1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL 8 9; 10; vXf32 11; 12 13define float @test_v2f32(<2 x float> %a0) { 14; SSE2-LABEL: test_v2f32: 15; SSE2: # %bb.0: 16; SSE2-NEXT: movaps %xmm0, %xmm1 17; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 18; SSE2-NEXT: minps %xmm1, %xmm0 19; SSE2-NEXT: retq 20; 21; SSE41-LABEL: test_v2f32: 22; SSE41: # %bb.0: 23; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 24; SSE41-NEXT: minps %xmm1, %xmm0 25; SSE41-NEXT: retq 26; 27; AVX-LABEL: test_v2f32: 28; AVX: # %bb.0: 29; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 30; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 31; AVX-NEXT: retq 32; 33; AVX512-LABEL: test_v2f32: 34; AVX512: # %bb.0: 35; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 36; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 37; AVX512-NEXT: retq 38 %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float> %a0) 39 ret float %1 40} 41 42define float @test_v4f32(<4 x float> %a0) { 43; SSE2-LABEL: test_v4f32: 44; SSE2: # %bb.0: 45; SSE2-NEXT: movaps %xmm0, %xmm1 46; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 47; SSE2-NEXT: minps %xmm1, %xmm0 48; SSE2-NEXT: movaps %xmm0, %xmm1 49; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 50; SSE2-NEXT: minps %xmm1, %xmm0 51; SSE2-NEXT: retq 52; 53; SSE41-LABEL: test_v4f32: 54; SSE41: # %bb.0: 55; SSE41-NEXT: movaps %xmm0, %xmm1 56; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 57; SSE41-NEXT: minps %xmm1, %xmm0 58; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 59; SSE41-NEXT: minps %xmm1, %xmm0 60; SSE41-NEXT: retq 61; 62; AVX-LABEL: test_v4f32: 63; AVX: # %bb.0: 64; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 65; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 66; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 67; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 68; AVX-NEXT: retq 69; 70; AVX512-LABEL: test_v4f32: 71; AVX512: # %bb.0: 72; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 73; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 74; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 75; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 76; AVX512-NEXT: retq 77 %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a0) 78 ret float %1 79} 80 81define float @test_v8f32(<8 x float> %a0) { 82; SSE2-LABEL: test_v8f32: 83; SSE2: # %bb.0: 84; SSE2-NEXT: minps %xmm1, %xmm0 85; SSE2-NEXT: movaps %xmm0, %xmm1 86; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 87; SSE2-NEXT: minps %xmm1, %xmm0 88; SSE2-NEXT: movaps %xmm0, %xmm1 89; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 90; SSE2-NEXT: minps %xmm1, %xmm0 91; SSE2-NEXT: retq 92; 93; SSE41-LABEL: test_v8f32: 94; SSE41: # %bb.0: 95; SSE41-NEXT: minps %xmm1, %xmm0 96; SSE41-NEXT: movaps %xmm0, %xmm1 97; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 98; SSE41-NEXT: minps %xmm1, %xmm0 99; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 100; SSE41-NEXT: minps %xmm1, %xmm0 101; SSE41-NEXT: retq 102; 103; AVX-LABEL: test_v8f32: 104; AVX: # %bb.0: 105; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 106; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 107; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 108; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 109; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 110; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 111; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 112; AVX-NEXT: vzeroupper 113; AVX-NEXT: retq 114; 115; AVX512-LABEL: test_v8f32: 116; AVX512: # %bb.0: 117; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 118; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 119; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 120; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 121; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 122; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 123; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 124; AVX512-NEXT: vzeroupper 125; AVX512-NEXT: retq 126 %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float> %a0) 127 ret float %1 128} 129 130define float @test_v16f32(<16 x float> %a0) { 131; SSE2-LABEL: test_v16f32: 132; SSE2: # %bb.0: 133; SSE2-NEXT: minps %xmm3, %xmm1 134; SSE2-NEXT: minps %xmm2, %xmm0 135; SSE2-NEXT: minps %xmm1, %xmm0 136; SSE2-NEXT: movaps %xmm0, %xmm1 137; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 138; SSE2-NEXT: minps %xmm1, %xmm0 139; SSE2-NEXT: movaps %xmm0, %xmm1 140; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 141; SSE2-NEXT: minps %xmm1, %xmm0 142; SSE2-NEXT: retq 143; 144; SSE41-LABEL: test_v16f32: 145; SSE41: # %bb.0: 146; SSE41-NEXT: minps %xmm3, %xmm1 147; SSE41-NEXT: minps %xmm2, %xmm0 148; SSE41-NEXT: minps %xmm1, %xmm0 149; SSE41-NEXT: movaps %xmm0, %xmm1 150; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 151; SSE41-NEXT: minps %xmm1, %xmm0 152; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 153; SSE41-NEXT: minps %xmm1, %xmm0 154; SSE41-NEXT: retq 155; 156; AVX-LABEL: test_v16f32: 157; AVX: # %bb.0: 158; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 159; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 160; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 161; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 162; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 163; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 164; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 165; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 166; AVX-NEXT: vzeroupper 167; AVX-NEXT: retq 168; 169; AVX512-LABEL: test_v16f32: 170; AVX512: # %bb.0: 171; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 172; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 173; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 174; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 175; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 176; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 177; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 178; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 179; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 180; AVX512-NEXT: vzeroupper 181; AVX512-NEXT: retq 182 %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float> %a0) 183 ret float %1 184} 185 186; 187; vXf64 188; 189 190define double @test_v2f64(<2 x double> %a0) { 191; SSE-LABEL: test_v2f64: 192; SSE: # %bb.0: 193; SSE-NEXT: movaps %xmm0, %xmm1 194; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 195; SSE-NEXT: minpd %xmm1, %xmm0 196; SSE-NEXT: retq 197; 198; AVX-LABEL: test_v2f64: 199; AVX: # %bb.0: 200; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 201; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 202; AVX-NEXT: retq 203; 204; AVX512-LABEL: test_v2f64: 205; AVX512: # %bb.0: 206; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 207; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 208; AVX512-NEXT: retq 209 %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a0) 210 ret double %1 211} 212 213define double @test_v4f64(<4 x double> %a0) { 214; SSE-LABEL: test_v4f64: 215; SSE: # %bb.0: 216; SSE-NEXT: minpd %xmm1, %xmm0 217; SSE-NEXT: movapd %xmm0, %xmm1 218; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 219; SSE-NEXT: minpd %xmm1, %xmm0 220; SSE-NEXT: retq 221; 222; AVX-LABEL: test_v4f64: 223; AVX: # %bb.0: 224; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 225; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 226; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 227; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 228; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 229; AVX-NEXT: vzeroupper 230; AVX-NEXT: retq 231; 232; AVX512-LABEL: test_v4f64: 233; AVX512: # %bb.0: 234; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 235; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 236; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 237; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 238; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 239; AVX512-NEXT: vzeroupper 240; AVX512-NEXT: retq 241 %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double> %a0) 242 ret double %1 243} 244 245define double @test_v8f64(<8 x double> %a0) { 246; SSE-LABEL: test_v8f64: 247; SSE: # %bb.0: 248; SSE-NEXT: minpd %xmm3, %xmm1 249; SSE-NEXT: minpd %xmm2, %xmm0 250; SSE-NEXT: minpd %xmm1, %xmm0 251; SSE-NEXT: movapd %xmm0, %xmm1 252; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 253; SSE-NEXT: minpd %xmm1, %xmm0 254; SSE-NEXT: retq 255; 256; AVX-LABEL: test_v8f64: 257; AVX: # %bb.0: 258; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 259; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 260; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 261; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 262; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 263; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 264; AVX-NEXT: vzeroupper 265; AVX-NEXT: retq 266; 267; AVX512-LABEL: test_v8f64: 268; AVX512: # %bb.0: 269; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 270; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 271; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 272; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 273; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 274; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 275; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 276; AVX512-NEXT: vzeroupper 277; AVX512-NEXT: retq 278 %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double> %a0) 279 ret double %1 280} 281 282define double @test_v16f64(<16 x double> %a0) { 283; SSE-LABEL: test_v16f64: 284; SSE: # %bb.0: 285; SSE-NEXT: minpd %xmm6, %xmm2 286; SSE-NEXT: minpd %xmm4, %xmm0 287; SSE-NEXT: minpd %xmm2, %xmm0 288; SSE-NEXT: minpd %xmm7, %xmm3 289; SSE-NEXT: minpd %xmm5, %xmm1 290; SSE-NEXT: minpd %xmm3, %xmm1 291; SSE-NEXT: minpd %xmm1, %xmm0 292; SSE-NEXT: movapd %xmm0, %xmm1 293; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 294; SSE-NEXT: minpd %xmm1, %xmm0 295; SSE-NEXT: retq 296; 297; AVX-LABEL: test_v16f64: 298; AVX: # %bb.0: 299; AVX-NEXT: vminpd %ymm3, %ymm1, %ymm1 300; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0 301; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 302; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 303; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 304; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 305; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 306; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 307; AVX-NEXT: vzeroupper 308; AVX-NEXT: retq 309; 310; AVX512-LABEL: test_v16f64: 311; AVX512: # %bb.0: 312; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 313; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 314; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 315; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 316; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 317; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 318; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 319; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 320; AVX512-NEXT: vzeroupper 321; AVX512-NEXT: retq 322 %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double> %a0) 323 ret double %1 324} 325 326declare float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float>) 327declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>) 328declare float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float>) 329declare float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float>) 330 331declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>) 332declare double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double>) 333declare double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double>) 334declare double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double>) 335