1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL 8 9; 10; vXi64 11; 12 13define i64 @test_v2i64(<2 x i64> %a0) { 14; SSE2-LABEL: test_v2i64: 15; SSE2: # %bb.0: 16; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 17; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 18; SSE2-NEXT: movdqa %xmm0, %xmm3 19; SSE2-NEXT: pxor %xmm2, %xmm3 20; SSE2-NEXT: pxor %xmm1, %xmm2 21; SSE2-NEXT: movdqa %xmm3, %xmm4 22; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 23; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 24; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 25; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 26; SSE2-NEXT: pand %xmm5, %xmm2 27; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 28; SSE2-NEXT: por %xmm2, %xmm3 29; SSE2-NEXT: pand %xmm3, %xmm0 30; SSE2-NEXT: pandn %xmm1, %xmm3 31; SSE2-NEXT: por %xmm0, %xmm3 32; SSE2-NEXT: movq %xmm3, %rax 33; SSE2-NEXT: retq 34; 35; SSE41-LABEL: test_v2i64: 36; SSE41: # %bb.0: 37; SSE41-NEXT: movdqa %xmm0, %xmm1 38; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 39; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0] 40; SSE41-NEXT: movdqa %xmm1, %xmm3 41; SSE41-NEXT: pxor %xmm0, %xmm3 42; SSE41-NEXT: pxor %xmm2, %xmm0 43; SSE41-NEXT: movdqa %xmm3, %xmm4 44; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 45; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 46; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 47; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 48; SSE41-NEXT: pand %xmm5, %xmm3 49; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] 50; SSE41-NEXT: por %xmm3, %xmm0 51; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 52; SSE41-NEXT: movq %xmm2, %rax 53; SSE41-NEXT: retq 54; 55; AVX-LABEL: test_v2i64: 56; AVX: # %bb.0: 57; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 58; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 59; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 60; AVX-NEXT: vmovq %xmm0, %rax 61; AVX-NEXT: retq 62; 63; AVX512BW-LABEL: test_v2i64: 64; AVX512BW: # %bb.0: 65; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 66; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 67; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 68; AVX512BW-NEXT: vmovq %xmm0, %rax 69; AVX512BW-NEXT: vzeroupper 70; AVX512BW-NEXT: retq 71; 72; AVX512VL-LABEL: test_v2i64: 73; AVX512VL: # %bb.0: 74; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 75; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 76; AVX512VL-NEXT: vmovq %xmm0, %rax 77; AVX512VL-NEXT: retq 78 %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %a0) 79 ret i64 %1 80} 81 82define i64 @test_v4i64(<4 x i64> %a0) { 83; SSE2-LABEL: test_v4i64: 84; SSE2: # %bb.0: 85; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 86; SSE2-NEXT: movdqa %xmm1, %xmm3 87; SSE2-NEXT: pxor %xmm2, %xmm3 88; SSE2-NEXT: movdqa %xmm0, %xmm4 89; SSE2-NEXT: pxor %xmm2, %xmm4 90; SSE2-NEXT: movdqa %xmm4, %xmm5 91; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 92; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 93; SSE2-NEXT: pcmpeqd %xmm3, %xmm4 94; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 95; SSE2-NEXT: pand %xmm6, %xmm3 96; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 97; SSE2-NEXT: por %xmm3, %xmm4 98; SSE2-NEXT: pand %xmm4, %xmm0 99; SSE2-NEXT: pandn %xmm1, %xmm4 100; SSE2-NEXT: por %xmm0, %xmm4 101; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1] 102; SSE2-NEXT: movdqa %xmm4, %xmm1 103; SSE2-NEXT: pxor %xmm2, %xmm1 104; SSE2-NEXT: pxor %xmm0, %xmm2 105; SSE2-NEXT: movdqa %xmm1, %xmm3 106; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 107; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 108; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 109; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 110; SSE2-NEXT: pand %xmm5, %xmm1 111; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 112; SSE2-NEXT: por %xmm1, %xmm2 113; SSE2-NEXT: pand %xmm2, %xmm4 114; SSE2-NEXT: pandn %xmm0, %xmm2 115; SSE2-NEXT: por %xmm4, %xmm2 116; SSE2-NEXT: movq %xmm2, %rax 117; SSE2-NEXT: retq 118; 119; SSE41-LABEL: test_v4i64: 120; SSE41: # %bb.0: 121; SSE41-NEXT: movdqa %xmm0, %xmm2 122; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,0,2147483648,0] 123; SSE41-NEXT: movdqa %xmm1, %xmm0 124; SSE41-NEXT: pxor %xmm3, %xmm0 125; SSE41-NEXT: movdqa %xmm2, %xmm4 126; SSE41-NEXT: pxor %xmm3, %xmm4 127; SSE41-NEXT: movdqa %xmm4, %xmm5 128; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 129; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 130; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 131; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 132; SSE41-NEXT: pand %xmm6, %xmm4 133; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3] 134; SSE41-NEXT: por %xmm4, %xmm0 135; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 136; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 137; SSE41-NEXT: movdqa %xmm1, %xmm0 138; SSE41-NEXT: pxor %xmm3, %xmm0 139; SSE41-NEXT: pxor %xmm2, %xmm3 140; SSE41-NEXT: movdqa %xmm0, %xmm4 141; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 142; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 143; SSE41-NEXT: pcmpeqd %xmm0, %xmm3 144; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 145; SSE41-NEXT: pand %xmm5, %xmm3 146; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] 147; SSE41-NEXT: por %xmm3, %xmm0 148; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 149; SSE41-NEXT: movq %xmm2, %rax 150; SSE41-NEXT: retq 151; 152; AVX1-LABEL: test_v4i64: 153; AVX1: # %bb.0: 154; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 155; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 156; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 157; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 158; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 159; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 160; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 161; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 162; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 163; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 164; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 165; AVX1-NEXT: vmovq %xmm0, %rax 166; AVX1-NEXT: vzeroupper 167; AVX1-NEXT: retq 168; 169; AVX2-LABEL: test_v4i64: 170; AVX2: # %bb.0: 171; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 172; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 173; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 174; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 175; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 176; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 177; AVX2-NEXT: vmovq %xmm0, %rax 178; AVX2-NEXT: vzeroupper 179; AVX2-NEXT: retq 180; 181; AVX512BW-LABEL: test_v4i64: 182; AVX512BW: # %bb.0: 183; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 184; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 185; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 186; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 187; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 188; AVX512BW-NEXT: vmovq %xmm0, %rax 189; AVX512BW-NEXT: vzeroupper 190; AVX512BW-NEXT: retq 191; 192; AVX512VL-LABEL: test_v4i64: 193; AVX512VL: # %bb.0: 194; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 195; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 196; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 197; AVX512VL-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 198; AVX512VL-NEXT: vmovq %xmm0, %rax 199; AVX512VL-NEXT: vzeroupper 200; AVX512VL-NEXT: retq 201 %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64> %a0) 202 ret i64 %1 203} 204 205define i64 @test_v8i64(<8 x i64> %a0) { 206; SSE2-LABEL: test_v8i64: 207; SSE2: # %bb.0: 208; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 209; SSE2-NEXT: movdqa %xmm2, %xmm5 210; SSE2-NEXT: pxor %xmm4, %xmm5 211; SSE2-NEXT: movdqa %xmm0, %xmm6 212; SSE2-NEXT: pxor %xmm4, %xmm6 213; SSE2-NEXT: movdqa %xmm6, %xmm7 214; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 215; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 216; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 217; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 218; SSE2-NEXT: pand %xmm8, %xmm6 219; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 220; SSE2-NEXT: por %xmm6, %xmm5 221; SSE2-NEXT: pand %xmm5, %xmm0 222; SSE2-NEXT: pandn %xmm2, %xmm5 223; SSE2-NEXT: por %xmm0, %xmm5 224; SSE2-NEXT: movdqa %xmm3, %xmm0 225; SSE2-NEXT: pxor %xmm4, %xmm0 226; SSE2-NEXT: movdqa %xmm1, %xmm2 227; SSE2-NEXT: pxor %xmm4, %xmm2 228; SSE2-NEXT: movdqa %xmm2, %xmm6 229; SSE2-NEXT: pcmpgtd %xmm0, %xmm6 230; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 231; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 232; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 233; SSE2-NEXT: pand %xmm7, %xmm0 234; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 235; SSE2-NEXT: por %xmm0, %xmm2 236; SSE2-NEXT: pand %xmm2, %xmm1 237; SSE2-NEXT: pandn %xmm3, %xmm2 238; SSE2-NEXT: por %xmm1, %xmm2 239; SSE2-NEXT: movdqa %xmm2, %xmm0 240; SSE2-NEXT: pxor %xmm4, %xmm0 241; SSE2-NEXT: movdqa %xmm5, %xmm1 242; SSE2-NEXT: pxor %xmm4, %xmm1 243; SSE2-NEXT: movdqa %xmm1, %xmm3 244; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 245; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] 246; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 247; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 248; SSE2-NEXT: pand %xmm6, %xmm0 249; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 250; SSE2-NEXT: por %xmm0, %xmm1 251; SSE2-NEXT: pand %xmm1, %xmm5 252; SSE2-NEXT: pandn %xmm2, %xmm1 253; SSE2-NEXT: por %xmm5, %xmm1 254; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 255; SSE2-NEXT: movdqa %xmm1, %xmm2 256; SSE2-NEXT: pxor %xmm4, %xmm2 257; SSE2-NEXT: pxor %xmm0, %xmm4 258; SSE2-NEXT: movdqa %xmm2, %xmm3 259; SSE2-NEXT: pcmpgtd %xmm4, %xmm3 260; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 261; SSE2-NEXT: pcmpeqd %xmm2, %xmm4 262; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 263; SSE2-NEXT: pand %xmm5, %xmm2 264; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 265; SSE2-NEXT: por %xmm2, %xmm3 266; SSE2-NEXT: pand %xmm3, %xmm1 267; SSE2-NEXT: pandn %xmm0, %xmm3 268; SSE2-NEXT: por %xmm1, %xmm3 269; SSE2-NEXT: movq %xmm3, %rax 270; SSE2-NEXT: retq 271; 272; SSE41-LABEL: test_v8i64: 273; SSE41: # %bb.0: 274; SSE41-NEXT: movdqa %xmm0, %xmm4 275; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0] 276; SSE41-NEXT: movdqa %xmm2, %xmm0 277; SSE41-NEXT: pxor %xmm5, %xmm0 278; SSE41-NEXT: movdqa %xmm4, %xmm6 279; SSE41-NEXT: pxor %xmm5, %xmm6 280; SSE41-NEXT: movdqa %xmm6, %xmm7 281; SSE41-NEXT: pcmpgtd %xmm0, %xmm7 282; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 283; SSE41-NEXT: pcmpeqd %xmm0, %xmm6 284; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 285; SSE41-NEXT: pand %xmm8, %xmm6 286; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] 287; SSE41-NEXT: por %xmm6, %xmm0 288; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 289; SSE41-NEXT: movdqa %xmm3, %xmm0 290; SSE41-NEXT: pxor %xmm5, %xmm0 291; SSE41-NEXT: movdqa %xmm1, %xmm4 292; SSE41-NEXT: pxor %xmm5, %xmm4 293; SSE41-NEXT: movdqa %xmm4, %xmm6 294; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 295; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 296; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 297; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 298; SSE41-NEXT: pand %xmm7, %xmm4 299; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 300; SSE41-NEXT: por %xmm4, %xmm0 301; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 302; SSE41-NEXT: movapd %xmm3, %xmm0 303; SSE41-NEXT: xorpd %xmm5, %xmm0 304; SSE41-NEXT: movapd %xmm2, %xmm1 305; SSE41-NEXT: xorpd %xmm5, %xmm1 306; SSE41-NEXT: movapd %xmm1, %xmm4 307; SSE41-NEXT: pcmpgtd %xmm0, %xmm4 308; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2] 309; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 310; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 311; SSE41-NEXT: pand %xmm6, %xmm1 312; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] 313; SSE41-NEXT: por %xmm1, %xmm0 314; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm3 315; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1] 316; SSE41-NEXT: movdqa %xmm3, %xmm0 317; SSE41-NEXT: pxor %xmm5, %xmm0 318; SSE41-NEXT: pxor %xmm1, %xmm5 319; SSE41-NEXT: movdqa %xmm0, %xmm2 320; SSE41-NEXT: pcmpgtd %xmm5, %xmm2 321; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] 322; SSE41-NEXT: pcmpeqd %xmm0, %xmm5 323; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 324; SSE41-NEXT: pand %xmm4, %xmm5 325; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 326; SSE41-NEXT: por %xmm5, %xmm0 327; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 328; SSE41-NEXT: movq %xmm1, %rax 329; SSE41-NEXT: retq 330; 331; AVX1-LABEL: test_v8i64: 332; AVX1: # %bb.0: 333; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 334; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 335; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 336; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 337; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 338; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 339; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 340; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 341; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 342; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 343; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 344; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 345; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 346; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 347; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 348; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 349; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 350; AVX1-NEXT: vmovq %xmm0, %rax 351; AVX1-NEXT: vzeroupper 352; AVX1-NEXT: retq 353; 354; AVX2-LABEL: test_v8i64: 355; AVX2: # %bb.0: 356; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 357; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 358; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 359; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 360; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 361; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 362; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 363; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 364; AVX2-NEXT: vmovq %xmm0, %rax 365; AVX2-NEXT: vzeroupper 366; AVX2-NEXT: retq 367; 368; AVX512-LABEL: test_v8i64: 369; AVX512: # %bb.0: 370; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 371; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 372; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 373; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 374; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 375; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 376; AVX512-NEXT: vmovq %xmm0, %rax 377; AVX512-NEXT: vzeroupper 378; AVX512-NEXT: retq 379 %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64> %a0) 380 ret i64 %1 381} 382 383define i64 @test_v16i64(<16 x i64> %a0) { 384; SSE2-LABEL: test_v16i64: 385; SSE2: # %bb.0: 386; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0] 387; SSE2-NEXT: movdqa %xmm5, %xmm9 388; SSE2-NEXT: pxor %xmm8, %xmm9 389; SSE2-NEXT: movdqa %xmm1, %xmm10 390; SSE2-NEXT: pxor %xmm8, %xmm10 391; SSE2-NEXT: movdqa %xmm10, %xmm11 392; SSE2-NEXT: pcmpgtd %xmm9, %xmm11 393; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2] 394; SSE2-NEXT: pcmpeqd %xmm9, %xmm10 395; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3] 396; SSE2-NEXT: pand %xmm12, %xmm10 397; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm11[1,1,3,3] 398; SSE2-NEXT: por %xmm10, %xmm9 399; SSE2-NEXT: pand %xmm9, %xmm1 400; SSE2-NEXT: pandn %xmm5, %xmm9 401; SSE2-NEXT: por %xmm1, %xmm9 402; SSE2-NEXT: movdqa %xmm7, %xmm1 403; SSE2-NEXT: pxor %xmm8, %xmm1 404; SSE2-NEXT: movdqa %xmm3, %xmm5 405; SSE2-NEXT: pxor %xmm8, %xmm5 406; SSE2-NEXT: movdqa %xmm5, %xmm10 407; SSE2-NEXT: pcmpgtd %xmm1, %xmm10 408; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 409; SSE2-NEXT: pcmpeqd %xmm1, %xmm5 410; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 411; SSE2-NEXT: pand %xmm11, %xmm5 412; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm10[1,1,3,3] 413; SSE2-NEXT: por %xmm5, %xmm1 414; SSE2-NEXT: pand %xmm1, %xmm3 415; SSE2-NEXT: pandn %xmm7, %xmm1 416; SSE2-NEXT: por %xmm3, %xmm1 417; SSE2-NEXT: movdqa %xmm4, %xmm3 418; SSE2-NEXT: pxor %xmm8, %xmm3 419; SSE2-NEXT: movdqa %xmm0, %xmm5 420; SSE2-NEXT: pxor %xmm8, %xmm5 421; SSE2-NEXT: movdqa %xmm5, %xmm7 422; SSE2-NEXT: pcmpgtd %xmm3, %xmm7 423; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2] 424; SSE2-NEXT: pcmpeqd %xmm3, %xmm5 425; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 426; SSE2-NEXT: pand %xmm10, %xmm5 427; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm7[1,1,3,3] 428; SSE2-NEXT: por %xmm5, %xmm3 429; SSE2-NEXT: pand %xmm3, %xmm0 430; SSE2-NEXT: pandn %xmm4, %xmm3 431; SSE2-NEXT: por %xmm0, %xmm3 432; SSE2-NEXT: movdqa %xmm6, %xmm0 433; SSE2-NEXT: pxor %xmm8, %xmm0 434; SSE2-NEXT: movdqa %xmm2, %xmm4 435; SSE2-NEXT: pxor %xmm8, %xmm4 436; SSE2-NEXT: movdqa %xmm4, %xmm5 437; SSE2-NEXT: pcmpgtd %xmm0, %xmm5 438; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2] 439; SSE2-NEXT: pcmpeqd %xmm0, %xmm4 440; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] 441; SSE2-NEXT: pand %xmm7, %xmm0 442; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 443; SSE2-NEXT: por %xmm0, %xmm4 444; SSE2-NEXT: pand %xmm4, %xmm2 445; SSE2-NEXT: pandn %xmm6, %xmm4 446; SSE2-NEXT: por %xmm2, %xmm4 447; SSE2-NEXT: movdqa %xmm4, %xmm0 448; SSE2-NEXT: pxor %xmm8, %xmm0 449; SSE2-NEXT: movdqa %xmm3, %xmm2 450; SSE2-NEXT: pxor %xmm8, %xmm2 451; SSE2-NEXT: movdqa %xmm2, %xmm5 452; SSE2-NEXT: pcmpgtd %xmm0, %xmm5 453; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 454; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 455; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 456; SSE2-NEXT: pand %xmm6, %xmm2 457; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3] 458; SSE2-NEXT: por %xmm2, %xmm0 459; SSE2-NEXT: pand %xmm0, %xmm3 460; SSE2-NEXT: pandn %xmm4, %xmm0 461; SSE2-NEXT: por %xmm3, %xmm0 462; SSE2-NEXT: movdqa %xmm1, %xmm2 463; SSE2-NEXT: pxor %xmm8, %xmm2 464; SSE2-NEXT: movdqa %xmm9, %xmm3 465; SSE2-NEXT: pxor %xmm8, %xmm3 466; SSE2-NEXT: movdqa %xmm3, %xmm4 467; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 468; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 469; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 470; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 471; SSE2-NEXT: pand %xmm5, %xmm2 472; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 473; SSE2-NEXT: por %xmm2, %xmm3 474; SSE2-NEXT: pand %xmm3, %xmm9 475; SSE2-NEXT: pandn %xmm1, %xmm3 476; SSE2-NEXT: por %xmm9, %xmm3 477; SSE2-NEXT: movdqa %xmm3, %xmm1 478; SSE2-NEXT: pxor %xmm8, %xmm1 479; SSE2-NEXT: movdqa %xmm0, %xmm2 480; SSE2-NEXT: pxor %xmm8, %xmm2 481; SSE2-NEXT: movdqa %xmm2, %xmm4 482; SSE2-NEXT: pcmpgtd %xmm1, %xmm4 483; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 484; SSE2-NEXT: pcmpeqd %xmm1, %xmm2 485; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 486; SSE2-NEXT: pand %xmm5, %xmm1 487; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 488; SSE2-NEXT: por %xmm1, %xmm2 489; SSE2-NEXT: pand %xmm2, %xmm0 490; SSE2-NEXT: pandn %xmm3, %xmm2 491; SSE2-NEXT: por %xmm0, %xmm2 492; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 493; SSE2-NEXT: movdqa %xmm2, %xmm1 494; SSE2-NEXT: pxor %xmm8, %xmm1 495; SSE2-NEXT: pxor %xmm0, %xmm8 496; SSE2-NEXT: movdqa %xmm1, %xmm3 497; SSE2-NEXT: pcmpgtd %xmm8, %xmm3 498; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] 499; SSE2-NEXT: pcmpeqd %xmm1, %xmm8 500; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm8[1,1,3,3] 501; SSE2-NEXT: pand %xmm4, %xmm1 502; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 503; SSE2-NEXT: por %xmm1, %xmm3 504; SSE2-NEXT: pand %xmm3, %xmm2 505; SSE2-NEXT: pandn %xmm0, %xmm3 506; SSE2-NEXT: por %xmm2, %xmm3 507; SSE2-NEXT: movq %xmm3, %rax 508; SSE2-NEXT: retq 509; 510; SSE41-LABEL: test_v16i64: 511; SSE41: # %bb.0: 512; SSE41-NEXT: movdqa %xmm0, %xmm8 513; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0] 514; SSE41-NEXT: movdqa %xmm5, %xmm10 515; SSE41-NEXT: pxor %xmm9, %xmm10 516; SSE41-NEXT: movdqa %xmm1, %xmm0 517; SSE41-NEXT: pxor %xmm9, %xmm0 518; SSE41-NEXT: movdqa %xmm0, %xmm11 519; SSE41-NEXT: pcmpgtd %xmm10, %xmm11 520; SSE41-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2] 521; SSE41-NEXT: pcmpeqd %xmm10, %xmm0 522; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm0[1,1,3,3] 523; SSE41-NEXT: pand %xmm12, %xmm10 524; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm11[1,1,3,3] 525; SSE41-NEXT: por %xmm10, %xmm0 526; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 527; SSE41-NEXT: movdqa %xmm7, %xmm0 528; SSE41-NEXT: pxor %xmm9, %xmm0 529; SSE41-NEXT: movdqa %xmm3, %xmm1 530; SSE41-NEXT: pxor %xmm9, %xmm1 531; SSE41-NEXT: movdqa %xmm1, %xmm10 532; SSE41-NEXT: pcmpgtd %xmm0, %xmm10 533; SSE41-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] 534; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 535; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 536; SSE41-NEXT: pand %xmm11, %xmm1 537; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3] 538; SSE41-NEXT: por %xmm1, %xmm0 539; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm7 540; SSE41-NEXT: movdqa %xmm4, %xmm0 541; SSE41-NEXT: pxor %xmm9, %xmm0 542; SSE41-NEXT: movdqa %xmm8, %xmm1 543; SSE41-NEXT: pxor %xmm9, %xmm1 544; SSE41-NEXT: movdqa %xmm1, %xmm3 545; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 546; SSE41-NEXT: pshufd {{.*#+}} xmm10 = xmm3[0,0,2,2] 547; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 548; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 549; SSE41-NEXT: pand %xmm10, %xmm1 550; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 551; SSE41-NEXT: por %xmm1, %xmm0 552; SSE41-NEXT: blendvpd %xmm0, %xmm8, %xmm4 553; SSE41-NEXT: movdqa %xmm6, %xmm0 554; SSE41-NEXT: pxor %xmm9, %xmm0 555; SSE41-NEXT: movdqa %xmm2, %xmm1 556; SSE41-NEXT: pxor %xmm9, %xmm1 557; SSE41-NEXT: movdqa %xmm1, %xmm3 558; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 559; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2] 560; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 561; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 562; SSE41-NEXT: pand %xmm8, %xmm1 563; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 564; SSE41-NEXT: por %xmm1, %xmm0 565; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm6 566; SSE41-NEXT: movapd %xmm6, %xmm0 567; SSE41-NEXT: xorpd %xmm9, %xmm0 568; SSE41-NEXT: movapd %xmm4, %xmm1 569; SSE41-NEXT: xorpd %xmm9, %xmm1 570; SSE41-NEXT: movapd %xmm1, %xmm2 571; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 572; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 573; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 574; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 575; SSE41-NEXT: pand %xmm3, %xmm1 576; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 577; SSE41-NEXT: por %xmm1, %xmm0 578; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm6 579; SSE41-NEXT: movapd %xmm7, %xmm0 580; SSE41-NEXT: xorpd %xmm9, %xmm0 581; SSE41-NEXT: movapd %xmm5, %xmm1 582; SSE41-NEXT: xorpd %xmm9, %xmm1 583; SSE41-NEXT: movapd %xmm1, %xmm2 584; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 585; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 586; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 587; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 588; SSE41-NEXT: pand %xmm3, %xmm1 589; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 590; SSE41-NEXT: por %xmm1, %xmm0 591; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm7 592; SSE41-NEXT: movapd %xmm7, %xmm0 593; SSE41-NEXT: xorpd %xmm9, %xmm0 594; SSE41-NEXT: movapd %xmm6, %xmm1 595; SSE41-NEXT: xorpd %xmm9, %xmm1 596; SSE41-NEXT: movapd %xmm1, %xmm2 597; SSE41-NEXT: pcmpgtd %xmm0, %xmm2 598; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 599; SSE41-NEXT: pcmpeqd %xmm0, %xmm1 600; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 601; SSE41-NEXT: pand %xmm3, %xmm1 602; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 603; SSE41-NEXT: por %xmm1, %xmm0 604; SSE41-NEXT: blendvpd %xmm0, %xmm6, %xmm7 605; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm7[2,3,0,1] 606; SSE41-NEXT: movdqa %xmm7, %xmm0 607; SSE41-NEXT: pxor %xmm9, %xmm0 608; SSE41-NEXT: pxor %xmm1, %xmm9 609; SSE41-NEXT: movdqa %xmm0, %xmm2 610; SSE41-NEXT: pcmpgtd %xmm9, %xmm2 611; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] 612; SSE41-NEXT: pcmpeqd %xmm0, %xmm9 613; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm9[1,1,3,3] 614; SSE41-NEXT: pand %xmm3, %xmm4 615; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 616; SSE41-NEXT: por %xmm4, %xmm0 617; SSE41-NEXT: blendvpd %xmm0, %xmm7, %xmm1 618; SSE41-NEXT: movq %xmm1, %rax 619; SSE41-NEXT: retq 620; 621; AVX1-LABEL: test_v16i64: 622; AVX1: # %bb.0: 623; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 624; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 625; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 626; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm5 627; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4 628; AVX1-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 629; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 630; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 631; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 632; AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm4 633; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 634; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm3, %ymm1 635; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 636; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 637; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 638; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 639; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 640; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 641; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 642; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 643; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 644; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 645; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 646; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 647; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 648; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 649; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3 650; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 651; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 652; AVX1-NEXT: vmovq %xmm0, %rax 653; AVX1-NEXT: vzeroupper 654; AVX1-NEXT: retq 655; 656; AVX2-LABEL: test_v16i64: 657; AVX2: # %bb.0: 658; AVX2-NEXT: vpcmpgtq %ymm3, %ymm1, %ymm4 659; AVX2-NEXT: vblendvpd %ymm4, %ymm1, %ymm3, %ymm1 660; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm3 661; AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm2, %ymm0 662; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 663; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 664; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 665; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 666; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 667; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] 668; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 669; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 670; AVX2-NEXT: vmovq %xmm0, %rax 671; AVX2-NEXT: vzeroupper 672; AVX2-NEXT: retq 673; 674; AVX512-LABEL: test_v16i64: 675; AVX512: # %bb.0: 676; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 677; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 678; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 679; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 680; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 681; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 682; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 683; AVX512-NEXT: vmovq %xmm0, %rax 684; AVX512-NEXT: vzeroupper 685; AVX512-NEXT: retq 686 %1 = call i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64> %a0) 687 ret i64 %1 688} 689 690; 691; vXi32 692; 693 694define i32 @test_v4i32(<4 x i32> %a0) { 695; SSE2-LABEL: test_v4i32: 696; SSE2: # %bb.0: 697; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 698; SSE2-NEXT: movdqa %xmm0, %xmm2 699; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 700; SSE2-NEXT: pand %xmm2, %xmm0 701; SSE2-NEXT: pandn %xmm1, %xmm2 702; SSE2-NEXT: por %xmm0, %xmm2 703; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 704; SSE2-NEXT: movdqa %xmm2, %xmm1 705; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 706; SSE2-NEXT: pand %xmm1, %xmm2 707; SSE2-NEXT: pandn %xmm0, %xmm1 708; SSE2-NEXT: por %xmm2, %xmm1 709; SSE2-NEXT: movd %xmm1, %eax 710; SSE2-NEXT: retq 711; 712; SSE41-LABEL: test_v4i32: 713; SSE41: # %bb.0: 714; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 715; SSE41-NEXT: pmaxsd %xmm0, %xmm1 716; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 717; SSE41-NEXT: pmaxsd %xmm1, %xmm0 718; SSE41-NEXT: movd %xmm0, %eax 719; SSE41-NEXT: retq 720; 721; AVX-LABEL: test_v4i32: 722; AVX: # %bb.0: 723; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 724; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 725; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 726; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 727; AVX-NEXT: vmovd %xmm0, %eax 728; AVX-NEXT: retq 729; 730; AVX512-LABEL: test_v4i32: 731; AVX512: # %bb.0: 732; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 733; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 734; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 735; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 736; AVX512-NEXT: vmovd %xmm0, %eax 737; AVX512-NEXT: retq 738 %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %a0) 739 ret i32 %1 740} 741 742define i32 @test_v8i32(<8 x i32> %a0) { 743; SSE2-LABEL: test_v8i32: 744; SSE2: # %bb.0: 745; SSE2-NEXT: movdqa %xmm0, %xmm2 746; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 747; SSE2-NEXT: pand %xmm2, %xmm0 748; SSE2-NEXT: pandn %xmm1, %xmm2 749; SSE2-NEXT: por %xmm0, %xmm2 750; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 751; SSE2-NEXT: movdqa %xmm2, %xmm1 752; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 753; SSE2-NEXT: pand %xmm1, %xmm2 754; SSE2-NEXT: pandn %xmm0, %xmm1 755; SSE2-NEXT: por %xmm2, %xmm1 756; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 757; SSE2-NEXT: movdqa %xmm1, %xmm2 758; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 759; SSE2-NEXT: pand %xmm2, %xmm1 760; SSE2-NEXT: pandn %xmm0, %xmm2 761; SSE2-NEXT: por %xmm1, %xmm2 762; SSE2-NEXT: movd %xmm2, %eax 763; SSE2-NEXT: retq 764; 765; SSE41-LABEL: test_v8i32: 766; SSE41: # %bb.0: 767; SSE41-NEXT: pmaxsd %xmm1, %xmm0 768; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 769; SSE41-NEXT: pmaxsd %xmm0, %xmm1 770; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 771; SSE41-NEXT: pmaxsd %xmm1, %xmm0 772; SSE41-NEXT: movd %xmm0, %eax 773; SSE41-NEXT: retq 774; 775; AVX1-LABEL: test_v8i32: 776; AVX1: # %bb.0: 777; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 778; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 779; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 780; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 781; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 782; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 783; AVX1-NEXT: vmovd %xmm0, %eax 784; AVX1-NEXT: vzeroupper 785; AVX1-NEXT: retq 786; 787; AVX2-LABEL: test_v8i32: 788; AVX2: # %bb.0: 789; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 790; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 791; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 792; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 793; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 794; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 795; AVX2-NEXT: vmovd %xmm0, %eax 796; AVX2-NEXT: vzeroupper 797; AVX2-NEXT: retq 798; 799; AVX512-LABEL: test_v8i32: 800; AVX512: # %bb.0: 801; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 802; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 803; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 804; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 805; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 806; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 807; AVX512-NEXT: vmovd %xmm0, %eax 808; AVX512-NEXT: vzeroupper 809; AVX512-NEXT: retq 810 %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32> %a0) 811 ret i32 %1 812} 813 814define i32 @test_v16i32(<16 x i32> %a0) { 815; SSE2-LABEL: test_v16i32: 816; SSE2: # %bb.0: 817; SSE2-NEXT: movdqa %xmm1, %xmm4 818; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 819; SSE2-NEXT: pand %xmm4, %xmm1 820; SSE2-NEXT: pandn %xmm3, %xmm4 821; SSE2-NEXT: por %xmm1, %xmm4 822; SSE2-NEXT: movdqa %xmm0, %xmm1 823; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 824; SSE2-NEXT: pand %xmm1, %xmm0 825; SSE2-NEXT: pandn %xmm2, %xmm1 826; SSE2-NEXT: por %xmm0, %xmm1 827; SSE2-NEXT: movdqa %xmm1, %xmm0 828; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 829; SSE2-NEXT: pand %xmm0, %xmm1 830; SSE2-NEXT: pandn %xmm4, %xmm0 831; SSE2-NEXT: por %xmm1, %xmm0 832; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 833; SSE2-NEXT: movdqa %xmm0, %xmm2 834; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 835; SSE2-NEXT: pand %xmm2, %xmm0 836; SSE2-NEXT: pandn %xmm1, %xmm2 837; SSE2-NEXT: por %xmm0, %xmm2 838; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 839; SSE2-NEXT: movdqa %xmm2, %xmm1 840; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 841; SSE2-NEXT: pand %xmm1, %xmm2 842; SSE2-NEXT: pandn %xmm0, %xmm1 843; SSE2-NEXT: por %xmm2, %xmm1 844; SSE2-NEXT: movd %xmm1, %eax 845; SSE2-NEXT: retq 846; 847; SSE41-LABEL: test_v16i32: 848; SSE41: # %bb.0: 849; SSE41-NEXT: pmaxsd %xmm3, %xmm1 850; SSE41-NEXT: pmaxsd %xmm2, %xmm0 851; SSE41-NEXT: pmaxsd %xmm1, %xmm0 852; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 853; SSE41-NEXT: pmaxsd %xmm0, %xmm1 854; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 855; SSE41-NEXT: pmaxsd %xmm1, %xmm0 856; SSE41-NEXT: movd %xmm0, %eax 857; SSE41-NEXT: retq 858; 859; AVX1-LABEL: test_v16i32: 860; AVX1: # %bb.0: 861; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 862; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 863; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 864; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 865; AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0 866; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 867; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 868; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 869; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 870; AVX1-NEXT: vmovd %xmm0, %eax 871; AVX1-NEXT: vzeroupper 872; AVX1-NEXT: retq 873; 874; AVX2-LABEL: test_v16i32: 875; AVX2: # %bb.0: 876; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 877; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 878; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 879; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 880; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 881; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 882; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 883; AVX2-NEXT: vmovd %xmm0, %eax 884; AVX2-NEXT: vzeroupper 885; AVX2-NEXT: retq 886; 887; AVX512-LABEL: test_v16i32: 888; AVX512: # %bb.0: 889; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 890; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 891; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 892; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 893; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 894; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 895; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 896; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 897; AVX512-NEXT: vmovd %xmm0, %eax 898; AVX512-NEXT: vzeroupper 899; AVX512-NEXT: retq 900 %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32> %a0) 901 ret i32 %1 902} 903 904define i32 @test_v32i32(<32 x i32> %a0) { 905; SSE2-LABEL: test_v32i32: 906; SSE2: # %bb.0: 907; SSE2-NEXT: movdqa %xmm2, %xmm8 908; SSE2-NEXT: pcmpgtd %xmm6, %xmm8 909; SSE2-NEXT: pand %xmm8, %xmm2 910; SSE2-NEXT: pandn %xmm6, %xmm8 911; SSE2-NEXT: por %xmm2, %xmm8 912; SSE2-NEXT: movdqa %xmm0, %xmm2 913; SSE2-NEXT: pcmpgtd %xmm4, %xmm2 914; SSE2-NEXT: pand %xmm2, %xmm0 915; SSE2-NEXT: pandn %xmm4, %xmm2 916; SSE2-NEXT: por %xmm0, %xmm2 917; SSE2-NEXT: movdqa %xmm3, %xmm0 918; SSE2-NEXT: pcmpgtd %xmm7, %xmm0 919; SSE2-NEXT: pand %xmm0, %xmm3 920; SSE2-NEXT: pandn %xmm7, %xmm0 921; SSE2-NEXT: por %xmm3, %xmm0 922; SSE2-NEXT: movdqa %xmm1, %xmm3 923; SSE2-NEXT: pcmpgtd %xmm5, %xmm3 924; SSE2-NEXT: pand %xmm3, %xmm1 925; SSE2-NEXT: pandn %xmm5, %xmm3 926; SSE2-NEXT: por %xmm1, %xmm3 927; SSE2-NEXT: movdqa %xmm3, %xmm1 928; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 929; SSE2-NEXT: pand %xmm1, %xmm3 930; SSE2-NEXT: pandn %xmm0, %xmm1 931; SSE2-NEXT: por %xmm3, %xmm1 932; SSE2-NEXT: movdqa %xmm2, %xmm0 933; SSE2-NEXT: pcmpgtd %xmm8, %xmm0 934; SSE2-NEXT: pand %xmm0, %xmm2 935; SSE2-NEXT: pandn %xmm8, %xmm0 936; SSE2-NEXT: por %xmm2, %xmm0 937; SSE2-NEXT: movdqa %xmm0, %xmm2 938; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 939; SSE2-NEXT: pand %xmm2, %xmm0 940; SSE2-NEXT: pandn %xmm1, %xmm2 941; SSE2-NEXT: por %xmm0, %xmm2 942; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 943; SSE2-NEXT: movdqa %xmm2, %xmm1 944; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 945; SSE2-NEXT: pand %xmm1, %xmm2 946; SSE2-NEXT: pandn %xmm0, %xmm1 947; SSE2-NEXT: por %xmm2, %xmm1 948; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 949; SSE2-NEXT: movdqa %xmm1, %xmm2 950; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 951; SSE2-NEXT: pand %xmm2, %xmm1 952; SSE2-NEXT: pandn %xmm0, %xmm2 953; SSE2-NEXT: por %xmm1, %xmm2 954; SSE2-NEXT: movd %xmm2, %eax 955; SSE2-NEXT: retq 956; 957; SSE41-LABEL: test_v32i32: 958; SSE41: # %bb.0: 959; SSE41-NEXT: pmaxsd %xmm6, %xmm2 960; SSE41-NEXT: pmaxsd %xmm4, %xmm0 961; SSE41-NEXT: pmaxsd %xmm2, %xmm0 962; SSE41-NEXT: pmaxsd %xmm7, %xmm3 963; SSE41-NEXT: pmaxsd %xmm5, %xmm1 964; SSE41-NEXT: pmaxsd %xmm3, %xmm1 965; SSE41-NEXT: pmaxsd %xmm0, %xmm1 966; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 967; SSE41-NEXT: pmaxsd %xmm1, %xmm0 968; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 969; SSE41-NEXT: pmaxsd %xmm0, %xmm1 970; SSE41-NEXT: movd %xmm1, %eax 971; SSE41-NEXT: retq 972; 973; AVX1-LABEL: test_v32i32: 974; AVX1: # %bb.0: 975; AVX1-NEXT: vpmaxsd %xmm3, %xmm1, %xmm4 976; AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm5 977; AVX1-NEXT: vpmaxsd %xmm4, %xmm5, %xmm4 978; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 979; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 980; AVX1-NEXT: vpmaxsd %xmm3, %xmm1, %xmm1 981; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 982; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 983; AVX1-NEXT: vpmaxsd %xmm2, %xmm0, %xmm0 984; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 985; AVX1-NEXT: vpmaxsd %xmm0, %xmm4, %xmm0 986; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 987; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 988; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 989; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 990; AVX1-NEXT: vmovd %xmm0, %eax 991; AVX1-NEXT: vzeroupper 992; AVX1-NEXT: retq 993; 994; AVX2-LABEL: test_v32i32: 995; AVX2: # %bb.0: 996; AVX2-NEXT: vpmaxsd %ymm3, %ymm1, %ymm1 997; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0 998; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 999; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1000; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1001; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1002; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1003; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1004; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 1005; AVX2-NEXT: vmovd %xmm0, %eax 1006; AVX2-NEXT: vzeroupper 1007; AVX2-NEXT: retq 1008; 1009; AVX512-LABEL: test_v32i32: 1010; AVX512: # %bb.0: 1011; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1012; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1013; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1014; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1015; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1016; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1017; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1018; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1019; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 1020; AVX512-NEXT: vmovd %xmm0, %eax 1021; AVX512-NEXT: vzeroupper 1022; AVX512-NEXT: retq 1023 %1 = call i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32> %a0) 1024 ret i32 %1 1025} 1026 1027; 1028; vXi16 1029; 1030 1031define i16 @test_v8i16(<8 x i16> %a0) { 1032; SSE2-LABEL: test_v8i16: 1033; SSE2: # %bb.0: 1034; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1035; SSE2-NEXT: pmaxsw %xmm0, %xmm1 1036; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1037; SSE2-NEXT: pmaxsw %xmm1, %xmm0 1038; SSE2-NEXT: movdqa %xmm0, %xmm1 1039; SSE2-NEXT: psrld $16, %xmm1 1040; SSE2-NEXT: pmaxsw %xmm0, %xmm1 1041; SSE2-NEXT: movd %xmm1, %eax 1042; SSE2-NEXT: # kill: def $ax killed $ax killed $eax 1043; SSE2-NEXT: retq 1044; 1045; SSE41-LABEL: test_v8i16: 1046; SSE41: # %bb.0: 1047; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1048; SSE41-NEXT: pxor %xmm1, %xmm0 1049; SSE41-NEXT: phminposuw %xmm0, %xmm0 1050; SSE41-NEXT: pxor %xmm1, %xmm0 1051; SSE41-NEXT: movd %xmm0, %eax 1052; SSE41-NEXT: # kill: def $ax killed $ax killed $eax 1053; SSE41-NEXT: retq 1054; 1055; AVX-LABEL: test_v8i16: 1056; AVX: # %bb.0: 1057; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1058; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 1059; AVX-NEXT: vphminposuw %xmm0, %xmm0 1060; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 1061; AVX-NEXT: vmovd %xmm0, %eax 1062; AVX-NEXT: # kill: def $ax killed $ax killed $eax 1063; AVX-NEXT: retq 1064; 1065; AVX512-LABEL: test_v8i16: 1066; AVX512: # %bb.0: 1067; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1068; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1069; AVX512-NEXT: vphminposuw %xmm0, %xmm0 1070; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1071; AVX512-NEXT: vmovd %xmm0, %eax 1072; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 1073; AVX512-NEXT: retq 1074 %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0) 1075 ret i16 %1 1076} 1077 1078define i16 @test_v16i16(<16 x i16> %a0) { 1079; SSE2-LABEL: test_v16i16: 1080; SSE2: # %bb.0: 1081; SSE2-NEXT: pmaxsw %xmm1, %xmm0 1082; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1083; SSE2-NEXT: pmaxsw %xmm0, %xmm1 1084; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1085; SSE2-NEXT: pmaxsw %xmm1, %xmm0 1086; SSE2-NEXT: movdqa %xmm0, %xmm1 1087; SSE2-NEXT: psrld $16, %xmm1 1088; SSE2-NEXT: pmaxsw %xmm0, %xmm1 1089; SSE2-NEXT: movd %xmm1, %eax 1090; SSE2-NEXT: # kill: def $ax killed $ax killed $eax 1091; SSE2-NEXT: retq 1092; 1093; SSE41-LABEL: test_v16i16: 1094; SSE41: # %bb.0: 1095; SSE41-NEXT: pmaxsw %xmm1, %xmm0 1096; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1097; SSE41-NEXT: pxor %xmm1, %xmm0 1098; SSE41-NEXT: phminposuw %xmm0, %xmm0 1099; SSE41-NEXT: pxor %xmm1, %xmm0 1100; SSE41-NEXT: movd %xmm0, %eax 1101; SSE41-NEXT: # kill: def $ax killed $ax killed $eax 1102; SSE41-NEXT: retq 1103; 1104; AVX1-LABEL: test_v16i16: 1105; AVX1: # %bb.0: 1106; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1107; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1108; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1109; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1110; AVX1-NEXT: vphminposuw %xmm0, %xmm0 1111; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1112; AVX1-NEXT: vmovd %xmm0, %eax 1113; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 1114; AVX1-NEXT: vzeroupper 1115; AVX1-NEXT: retq 1116; 1117; AVX2-LABEL: test_v16i16: 1118; AVX2: # %bb.0: 1119; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1120; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1121; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1122; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1123; AVX2-NEXT: vphminposuw %xmm0, %xmm0 1124; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1125; AVX2-NEXT: vmovd %xmm0, %eax 1126; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 1127; AVX2-NEXT: vzeroupper 1128; AVX2-NEXT: retq 1129; 1130; AVX512-LABEL: test_v16i16: 1131; AVX512: # %bb.0: 1132; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1133; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1134; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1135; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1136; AVX512-NEXT: vphminposuw %xmm0, %xmm0 1137; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1138; AVX512-NEXT: vmovd %xmm0, %eax 1139; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 1140; AVX512-NEXT: vzeroupper 1141; AVX512-NEXT: retq 1142 %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16> %a0) 1143 ret i16 %1 1144} 1145 1146define i16 @test_v32i16(<32 x i16> %a0) { 1147; SSE2-LABEL: test_v32i16: 1148; SSE2: # %bb.0: 1149; SSE2-NEXT: pmaxsw %xmm3, %xmm1 1150; SSE2-NEXT: pmaxsw %xmm2, %xmm0 1151; SSE2-NEXT: pmaxsw %xmm1, %xmm0 1152; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1153; SSE2-NEXT: pmaxsw %xmm0, %xmm1 1154; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1155; SSE2-NEXT: pmaxsw %xmm1, %xmm0 1156; SSE2-NEXT: movdqa %xmm0, %xmm1 1157; SSE2-NEXT: psrld $16, %xmm1 1158; SSE2-NEXT: pmaxsw %xmm0, %xmm1 1159; SSE2-NEXT: movd %xmm1, %eax 1160; SSE2-NEXT: # kill: def $ax killed $ax killed $eax 1161; SSE2-NEXT: retq 1162; 1163; SSE41-LABEL: test_v32i16: 1164; SSE41: # %bb.0: 1165; SSE41-NEXT: pmaxsw %xmm3, %xmm1 1166; SSE41-NEXT: pmaxsw %xmm2, %xmm0 1167; SSE41-NEXT: pmaxsw %xmm1, %xmm0 1168; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1169; SSE41-NEXT: pxor %xmm1, %xmm0 1170; SSE41-NEXT: phminposuw %xmm0, %xmm0 1171; SSE41-NEXT: pxor %xmm1, %xmm0 1172; SSE41-NEXT: movd %xmm0, %eax 1173; SSE41-NEXT: # kill: def $ax killed $ax killed $eax 1174; SSE41-NEXT: retq 1175; 1176; AVX1-LABEL: test_v32i16: 1177; AVX1: # %bb.0: 1178; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1179; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1180; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 1181; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1182; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 1183; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1184; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1185; AVX1-NEXT: vphminposuw %xmm0, %xmm0 1186; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1187; AVX1-NEXT: vmovd %xmm0, %eax 1188; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 1189; AVX1-NEXT: vzeroupper 1190; AVX1-NEXT: retq 1191; 1192; AVX2-LABEL: test_v32i16: 1193; AVX2: # %bb.0: 1194; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1195; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1196; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1197; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1198; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1199; AVX2-NEXT: vphminposuw %xmm0, %xmm0 1200; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1201; AVX2-NEXT: vmovd %xmm0, %eax 1202; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 1203; AVX2-NEXT: vzeroupper 1204; AVX2-NEXT: retq 1205; 1206; AVX512-LABEL: test_v32i16: 1207; AVX512: # %bb.0: 1208; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1209; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1210; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1211; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1212; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1213; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1214; AVX512-NEXT: vphminposuw %xmm0, %xmm0 1215; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1216; AVX512-NEXT: vmovd %xmm0, %eax 1217; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 1218; AVX512-NEXT: vzeroupper 1219; AVX512-NEXT: retq 1220 %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16> %a0) 1221 ret i16 %1 1222} 1223 1224define i16 @test_v64i16(<64 x i16> %a0) { 1225; SSE2-LABEL: test_v64i16: 1226; SSE2: # %bb.0: 1227; SSE2-NEXT: pmaxsw %xmm6, %xmm2 1228; SSE2-NEXT: pmaxsw %xmm4, %xmm0 1229; SSE2-NEXT: pmaxsw %xmm2, %xmm0 1230; SSE2-NEXT: pmaxsw %xmm7, %xmm3 1231; SSE2-NEXT: pmaxsw %xmm5, %xmm1 1232; SSE2-NEXT: pmaxsw %xmm3, %xmm1 1233; SSE2-NEXT: pmaxsw %xmm0, %xmm1 1234; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1235; SSE2-NEXT: pmaxsw %xmm1, %xmm0 1236; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1237; SSE2-NEXT: pmaxsw %xmm0, %xmm1 1238; SSE2-NEXT: movdqa %xmm1, %xmm0 1239; SSE2-NEXT: psrld $16, %xmm0 1240; SSE2-NEXT: pmaxsw %xmm1, %xmm0 1241; SSE2-NEXT: movd %xmm0, %eax 1242; SSE2-NEXT: # kill: def $ax killed $ax killed $eax 1243; SSE2-NEXT: retq 1244; 1245; SSE41-LABEL: test_v64i16: 1246; SSE41: # %bb.0: 1247; SSE41-NEXT: pmaxsw %xmm7, %xmm3 1248; SSE41-NEXT: pmaxsw %xmm5, %xmm1 1249; SSE41-NEXT: pmaxsw %xmm3, %xmm1 1250; SSE41-NEXT: pmaxsw %xmm6, %xmm2 1251; SSE41-NEXT: pmaxsw %xmm4, %xmm0 1252; SSE41-NEXT: pmaxsw %xmm2, %xmm0 1253; SSE41-NEXT: pmaxsw %xmm1, %xmm0 1254; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1255; SSE41-NEXT: pxor %xmm1, %xmm0 1256; SSE41-NEXT: phminposuw %xmm0, %xmm0 1257; SSE41-NEXT: pxor %xmm1, %xmm0 1258; SSE41-NEXT: movd %xmm0, %eax 1259; SSE41-NEXT: # kill: def $ax killed $ax killed $eax 1260; SSE41-NEXT: retq 1261; 1262; AVX1-LABEL: test_v64i16: 1263; AVX1: # %bb.0: 1264; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 1265; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 1266; AVX1-NEXT: vpmaxsw %xmm4, %xmm5, %xmm4 1267; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 1268; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 1269; AVX1-NEXT: vpmaxsw %xmm5, %xmm6, %xmm5 1270; AVX1-NEXT: vpmaxsw %xmm4, %xmm5, %xmm4 1271; AVX1-NEXT: vpmaxsw %xmm3, %xmm1, %xmm1 1272; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 1273; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1274; AVX1-NEXT: vpmaxsw %xmm4, %xmm0, %xmm0 1275; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1276; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1277; AVX1-NEXT: vphminposuw %xmm0, %xmm0 1278; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1279; AVX1-NEXT: vmovd %xmm0, %eax 1280; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 1281; AVX1-NEXT: vzeroupper 1282; AVX1-NEXT: retq 1283; 1284; AVX2-LABEL: test_v64i16: 1285; AVX2: # %bb.0: 1286; AVX2-NEXT: vpmaxsw %ymm3, %ymm1, %ymm1 1287; AVX2-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0 1288; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1289; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1290; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1291; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1292; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1293; AVX2-NEXT: vphminposuw %xmm0, %xmm0 1294; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1295; AVX2-NEXT: vmovd %xmm0, %eax 1296; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 1297; AVX2-NEXT: vzeroupper 1298; AVX2-NEXT: retq 1299; 1300; AVX512-LABEL: test_v64i16: 1301; AVX512: # %bb.0: 1302; AVX512-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 1303; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1304; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 1305; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1306; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 1307; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] 1308; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1309; AVX512-NEXT: vphminposuw %xmm0, %xmm0 1310; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1311; AVX512-NEXT: vmovd %xmm0, %eax 1312; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 1313; AVX512-NEXT: vzeroupper 1314; AVX512-NEXT: retq 1315 %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16> %a0) 1316 ret i16 %1 1317} 1318 1319; 1320; vXi8 1321; 1322 1323define i8 @test_v16i8(<16 x i8> %a0) { 1324; SSE2-LABEL: test_v16i8: 1325; SSE2: # %bb.0: 1326; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1327; SSE2-NEXT: movdqa %xmm0, %xmm2 1328; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1329; SSE2-NEXT: pand %xmm2, %xmm0 1330; SSE2-NEXT: pandn %xmm1, %xmm2 1331; SSE2-NEXT: por %xmm0, %xmm2 1332; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1333; SSE2-NEXT: movdqa %xmm2, %xmm1 1334; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1335; SSE2-NEXT: pand %xmm1, %xmm2 1336; SSE2-NEXT: pandn %xmm0, %xmm1 1337; SSE2-NEXT: por %xmm2, %xmm1 1338; SSE2-NEXT: movdqa %xmm1, %xmm0 1339; SSE2-NEXT: psrld $16, %xmm0 1340; SSE2-NEXT: movdqa %xmm1, %xmm2 1341; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1342; SSE2-NEXT: pand %xmm2, %xmm1 1343; SSE2-NEXT: pandn %xmm0, %xmm2 1344; SSE2-NEXT: por %xmm1, %xmm2 1345; SSE2-NEXT: movdqa %xmm2, %xmm0 1346; SSE2-NEXT: psrlw $8, %xmm0 1347; SSE2-NEXT: movdqa %xmm2, %xmm1 1348; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1349; SSE2-NEXT: pand %xmm1, %xmm2 1350; SSE2-NEXT: pandn %xmm0, %xmm1 1351; SSE2-NEXT: por %xmm2, %xmm1 1352; SSE2-NEXT: movd %xmm1, %eax 1353; SSE2-NEXT: # kill: def $al killed $al killed $eax 1354; SSE2-NEXT: retq 1355; 1356; SSE41-LABEL: test_v16i8: 1357; SSE41: # %bb.0: 1358; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1359; SSE41-NEXT: pxor %xmm1, %xmm0 1360; SSE41-NEXT: movdqa %xmm0, %xmm2 1361; SSE41-NEXT: psrlw $8, %xmm2 1362; SSE41-NEXT: pminub %xmm0, %xmm2 1363; SSE41-NEXT: phminposuw %xmm2, %xmm0 1364; SSE41-NEXT: pxor %xmm1, %xmm0 1365; SSE41-NEXT: pextrb $0, %xmm0, %eax 1366; SSE41-NEXT: # kill: def $al killed $al killed $eax 1367; SSE41-NEXT: retq 1368; 1369; AVX-LABEL: test_v16i8: 1370; AVX: # %bb.0: 1371; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1372; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 1373; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 1374; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 1375; AVX-NEXT: vphminposuw %xmm0, %xmm0 1376; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 1377; AVX-NEXT: vpextrb $0, %xmm0, %eax 1378; AVX-NEXT: # kill: def $al killed $al killed $eax 1379; AVX-NEXT: retq 1380; 1381; AVX512-LABEL: test_v16i8: 1382; AVX512: # %bb.0: 1383; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1384; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1385; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 1386; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 1387; AVX512-NEXT: vphminposuw %xmm0, %xmm0 1388; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1389; AVX512-NEXT: vpextrb $0, %xmm0, %eax 1390; AVX512-NEXT: # kill: def $al killed $al killed $eax 1391; AVX512-NEXT: retq 1392 %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0) 1393 ret i8 %1 1394} 1395 1396define i8 @test_v32i8(<32 x i8> %a0) { 1397; SSE2-LABEL: test_v32i8: 1398; SSE2: # %bb.0: 1399; SSE2-NEXT: movdqa %xmm0, %xmm2 1400; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1401; SSE2-NEXT: pand %xmm2, %xmm0 1402; SSE2-NEXT: pandn %xmm1, %xmm2 1403; SSE2-NEXT: por %xmm0, %xmm2 1404; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 1405; SSE2-NEXT: movdqa %xmm2, %xmm1 1406; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1407; SSE2-NEXT: pand %xmm1, %xmm2 1408; SSE2-NEXT: pandn %xmm0, %xmm1 1409; SSE2-NEXT: por %xmm2, %xmm1 1410; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1411; SSE2-NEXT: movdqa %xmm1, %xmm2 1412; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1413; SSE2-NEXT: pand %xmm2, %xmm1 1414; SSE2-NEXT: pandn %xmm0, %xmm2 1415; SSE2-NEXT: por %xmm1, %xmm2 1416; SSE2-NEXT: movdqa %xmm2, %xmm0 1417; SSE2-NEXT: psrld $16, %xmm0 1418; SSE2-NEXT: movdqa %xmm2, %xmm1 1419; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1420; SSE2-NEXT: pand %xmm1, %xmm2 1421; SSE2-NEXT: pandn %xmm0, %xmm1 1422; SSE2-NEXT: por %xmm2, %xmm1 1423; SSE2-NEXT: movdqa %xmm1, %xmm0 1424; SSE2-NEXT: psrlw $8, %xmm0 1425; SSE2-NEXT: movdqa %xmm1, %xmm2 1426; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1427; SSE2-NEXT: pand %xmm2, %xmm1 1428; SSE2-NEXT: pandn %xmm0, %xmm2 1429; SSE2-NEXT: por %xmm1, %xmm2 1430; SSE2-NEXT: movd %xmm2, %eax 1431; SSE2-NEXT: # kill: def $al killed $al killed $eax 1432; SSE2-NEXT: retq 1433; 1434; SSE41-LABEL: test_v32i8: 1435; SSE41: # %bb.0: 1436; SSE41-NEXT: pmaxsb %xmm1, %xmm0 1437; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1438; SSE41-NEXT: pxor %xmm1, %xmm0 1439; SSE41-NEXT: movdqa %xmm0, %xmm2 1440; SSE41-NEXT: psrlw $8, %xmm2 1441; SSE41-NEXT: pminub %xmm0, %xmm2 1442; SSE41-NEXT: phminposuw %xmm2, %xmm0 1443; SSE41-NEXT: pxor %xmm1, %xmm0 1444; SSE41-NEXT: pextrb $0, %xmm0, %eax 1445; SSE41-NEXT: # kill: def $al killed $al killed $eax 1446; SSE41-NEXT: retq 1447; 1448; AVX1-LABEL: test_v32i8: 1449; AVX1: # %bb.0: 1450; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1451; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1452; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1453; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1454; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 1455; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1456; AVX1-NEXT: vphminposuw %xmm0, %xmm0 1457; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1458; AVX1-NEXT: vpextrb $0, %xmm0, %eax 1459; AVX1-NEXT: # kill: def $al killed $al killed $eax 1460; AVX1-NEXT: vzeroupper 1461; AVX1-NEXT: retq 1462; 1463; AVX2-LABEL: test_v32i8: 1464; AVX2: # %bb.0: 1465; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1466; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1467; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1468; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1469; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 1470; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 1471; AVX2-NEXT: vphminposuw %xmm0, %xmm0 1472; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1473; AVX2-NEXT: vpextrb $0, %xmm0, %eax 1474; AVX2-NEXT: # kill: def $al killed $al killed $eax 1475; AVX2-NEXT: vzeroupper 1476; AVX2-NEXT: retq 1477; 1478; AVX512-LABEL: test_v32i8: 1479; AVX512: # %bb.0: 1480; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1481; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1482; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1483; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1484; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 1485; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 1486; AVX512-NEXT: vphminposuw %xmm0, %xmm0 1487; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1488; AVX512-NEXT: vpextrb $0, %xmm0, %eax 1489; AVX512-NEXT: # kill: def $al killed $al killed $eax 1490; AVX512-NEXT: vzeroupper 1491; AVX512-NEXT: retq 1492 %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8> %a0) 1493 ret i8 %1 1494} 1495 1496define i8 @test_v64i8(<64 x i8> %a0) { 1497; SSE2-LABEL: test_v64i8: 1498; SSE2: # %bb.0: 1499; SSE2-NEXT: movdqa %xmm1, %xmm4 1500; SSE2-NEXT: pcmpgtb %xmm3, %xmm4 1501; SSE2-NEXT: pand %xmm4, %xmm1 1502; SSE2-NEXT: pandn %xmm3, %xmm4 1503; SSE2-NEXT: por %xmm1, %xmm4 1504; SSE2-NEXT: movdqa %xmm0, %xmm1 1505; SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1506; SSE2-NEXT: pand %xmm1, %xmm0 1507; SSE2-NEXT: pandn %xmm2, %xmm1 1508; SSE2-NEXT: por %xmm0, %xmm1 1509; SSE2-NEXT: movdqa %xmm1, %xmm0 1510; SSE2-NEXT: pcmpgtb %xmm4, %xmm0 1511; SSE2-NEXT: pand %xmm0, %xmm1 1512; SSE2-NEXT: pandn %xmm4, %xmm0 1513; SSE2-NEXT: por %xmm1, %xmm0 1514; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1515; SSE2-NEXT: movdqa %xmm0, %xmm2 1516; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1517; SSE2-NEXT: pand %xmm2, %xmm0 1518; SSE2-NEXT: pandn %xmm1, %xmm2 1519; SSE2-NEXT: por %xmm0, %xmm2 1520; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1521; SSE2-NEXT: movdqa %xmm2, %xmm1 1522; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1523; SSE2-NEXT: pand %xmm1, %xmm2 1524; SSE2-NEXT: pandn %xmm0, %xmm1 1525; SSE2-NEXT: por %xmm2, %xmm1 1526; SSE2-NEXT: movdqa %xmm1, %xmm0 1527; SSE2-NEXT: psrld $16, %xmm0 1528; SSE2-NEXT: movdqa %xmm1, %xmm2 1529; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1530; SSE2-NEXT: pand %xmm2, %xmm1 1531; SSE2-NEXT: pandn %xmm0, %xmm2 1532; SSE2-NEXT: por %xmm1, %xmm2 1533; SSE2-NEXT: movdqa %xmm2, %xmm0 1534; SSE2-NEXT: psrlw $8, %xmm0 1535; SSE2-NEXT: movdqa %xmm2, %xmm1 1536; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1537; SSE2-NEXT: pand %xmm1, %xmm2 1538; SSE2-NEXT: pandn %xmm0, %xmm1 1539; SSE2-NEXT: por %xmm2, %xmm1 1540; SSE2-NEXT: movd %xmm1, %eax 1541; SSE2-NEXT: # kill: def $al killed $al killed $eax 1542; SSE2-NEXT: retq 1543; 1544; SSE41-LABEL: test_v64i8: 1545; SSE41: # %bb.0: 1546; SSE41-NEXT: pmaxsb %xmm3, %xmm1 1547; SSE41-NEXT: pmaxsb %xmm2, %xmm0 1548; SSE41-NEXT: pmaxsb %xmm1, %xmm0 1549; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1550; SSE41-NEXT: pxor %xmm1, %xmm0 1551; SSE41-NEXT: movdqa %xmm0, %xmm2 1552; SSE41-NEXT: psrlw $8, %xmm2 1553; SSE41-NEXT: pminub %xmm0, %xmm2 1554; SSE41-NEXT: phminposuw %xmm2, %xmm0 1555; SSE41-NEXT: pxor %xmm1, %xmm0 1556; SSE41-NEXT: pextrb $0, %xmm0, %eax 1557; SSE41-NEXT: # kill: def $al killed $al killed $eax 1558; SSE41-NEXT: retq 1559; 1560; AVX1-LABEL: test_v64i8: 1561; AVX1: # %bb.0: 1562; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1563; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1564; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 1565; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1566; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 1567; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1568; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1569; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 1570; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1571; AVX1-NEXT: vphminposuw %xmm0, %xmm0 1572; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1573; AVX1-NEXT: vpextrb $0, %xmm0, %eax 1574; AVX1-NEXT: # kill: def $al killed $al killed $eax 1575; AVX1-NEXT: vzeroupper 1576; AVX1-NEXT: retq 1577; 1578; AVX2-LABEL: test_v64i8: 1579; AVX2: # %bb.0: 1580; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1581; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1582; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1583; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1584; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1585; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 1586; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 1587; AVX2-NEXT: vphminposuw %xmm0, %xmm0 1588; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1589; AVX2-NEXT: vpextrb $0, %xmm0, %eax 1590; AVX2-NEXT: # kill: def $al killed $al killed $eax 1591; AVX2-NEXT: vzeroupper 1592; AVX2-NEXT: retq 1593; 1594; AVX512-LABEL: test_v64i8: 1595; AVX512: # %bb.0: 1596; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1597; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1598; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1599; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1600; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1601; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1602; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 1603; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 1604; AVX512-NEXT: vphminposuw %xmm0, %xmm0 1605; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1606; AVX512-NEXT: vpextrb $0, %xmm0, %eax 1607; AVX512-NEXT: # kill: def $al killed $al killed $eax 1608; AVX512-NEXT: vzeroupper 1609; AVX512-NEXT: retq 1610 %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8> %a0) 1611 ret i8 %1 1612} 1613 1614define i8 @test_v128i8(<128 x i8> %a0) { 1615; SSE2-LABEL: test_v128i8: 1616; SSE2: # %bb.0: 1617; SSE2-NEXT: movdqa %xmm2, %xmm8 1618; SSE2-NEXT: pcmpgtb %xmm6, %xmm8 1619; SSE2-NEXT: pand %xmm8, %xmm2 1620; SSE2-NEXT: pandn %xmm6, %xmm8 1621; SSE2-NEXT: por %xmm2, %xmm8 1622; SSE2-NEXT: movdqa %xmm0, %xmm2 1623; SSE2-NEXT: pcmpgtb %xmm4, %xmm2 1624; SSE2-NEXT: pand %xmm2, %xmm0 1625; SSE2-NEXT: pandn %xmm4, %xmm2 1626; SSE2-NEXT: por %xmm0, %xmm2 1627; SSE2-NEXT: movdqa %xmm3, %xmm0 1628; SSE2-NEXT: pcmpgtb %xmm7, %xmm0 1629; SSE2-NEXT: pand %xmm0, %xmm3 1630; SSE2-NEXT: pandn %xmm7, %xmm0 1631; SSE2-NEXT: por %xmm3, %xmm0 1632; SSE2-NEXT: movdqa %xmm1, %xmm3 1633; SSE2-NEXT: pcmpgtb %xmm5, %xmm3 1634; SSE2-NEXT: pand %xmm3, %xmm1 1635; SSE2-NEXT: pandn %xmm5, %xmm3 1636; SSE2-NEXT: por %xmm1, %xmm3 1637; SSE2-NEXT: movdqa %xmm3, %xmm1 1638; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1639; SSE2-NEXT: pand %xmm1, %xmm3 1640; SSE2-NEXT: pandn %xmm0, %xmm1 1641; SSE2-NEXT: por %xmm3, %xmm1 1642; SSE2-NEXT: movdqa %xmm2, %xmm0 1643; SSE2-NEXT: pcmpgtb %xmm8, %xmm0 1644; SSE2-NEXT: pand %xmm0, %xmm2 1645; SSE2-NEXT: pandn %xmm8, %xmm0 1646; SSE2-NEXT: por %xmm2, %xmm0 1647; SSE2-NEXT: movdqa %xmm0, %xmm2 1648; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1649; SSE2-NEXT: pand %xmm2, %xmm0 1650; SSE2-NEXT: pandn %xmm1, %xmm2 1651; SSE2-NEXT: por %xmm0, %xmm2 1652; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 1653; SSE2-NEXT: movdqa %xmm2, %xmm1 1654; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1655; SSE2-NEXT: pand %xmm1, %xmm2 1656; SSE2-NEXT: pandn %xmm0, %xmm1 1657; SSE2-NEXT: por %xmm2, %xmm1 1658; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1659; SSE2-NEXT: movdqa %xmm1, %xmm2 1660; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1661; SSE2-NEXT: pand %xmm2, %xmm1 1662; SSE2-NEXT: pandn %xmm0, %xmm2 1663; SSE2-NEXT: por %xmm1, %xmm2 1664; SSE2-NEXT: movdqa %xmm2, %xmm0 1665; SSE2-NEXT: psrld $16, %xmm0 1666; SSE2-NEXT: movdqa %xmm2, %xmm1 1667; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 1668; SSE2-NEXT: pand %xmm1, %xmm2 1669; SSE2-NEXT: pandn %xmm0, %xmm1 1670; SSE2-NEXT: por %xmm2, %xmm1 1671; SSE2-NEXT: movdqa %xmm1, %xmm0 1672; SSE2-NEXT: psrlw $8, %xmm0 1673; SSE2-NEXT: movdqa %xmm1, %xmm2 1674; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1675; SSE2-NEXT: pand %xmm2, %xmm1 1676; SSE2-NEXT: pandn %xmm0, %xmm2 1677; SSE2-NEXT: por %xmm1, %xmm2 1678; SSE2-NEXT: movd %xmm2, %eax 1679; SSE2-NEXT: # kill: def $al killed $al killed $eax 1680; SSE2-NEXT: retq 1681; 1682; SSE41-LABEL: test_v128i8: 1683; SSE41: # %bb.0: 1684; SSE41-NEXT: pmaxsb %xmm7, %xmm3 1685; SSE41-NEXT: pmaxsb %xmm5, %xmm1 1686; SSE41-NEXT: pmaxsb %xmm3, %xmm1 1687; SSE41-NEXT: pmaxsb %xmm6, %xmm2 1688; SSE41-NEXT: pmaxsb %xmm4, %xmm0 1689; SSE41-NEXT: pmaxsb %xmm2, %xmm0 1690; SSE41-NEXT: pmaxsb %xmm1, %xmm0 1691; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1692; SSE41-NEXT: pxor %xmm1, %xmm0 1693; SSE41-NEXT: movdqa %xmm0, %xmm2 1694; SSE41-NEXT: psrlw $8, %xmm2 1695; SSE41-NEXT: pminub %xmm0, %xmm2 1696; SSE41-NEXT: phminposuw %xmm2, %xmm0 1697; SSE41-NEXT: pxor %xmm1, %xmm0 1698; SSE41-NEXT: pextrb $0, %xmm0, %eax 1699; SSE41-NEXT: # kill: def $al killed $al killed $eax 1700; SSE41-NEXT: retq 1701; 1702; AVX1-LABEL: test_v128i8: 1703; AVX1: # %bb.0: 1704; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 1705; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 1706; AVX1-NEXT: vpmaxsb %xmm4, %xmm5, %xmm4 1707; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 1708; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 1709; AVX1-NEXT: vpmaxsb %xmm5, %xmm6, %xmm5 1710; AVX1-NEXT: vpmaxsb %xmm4, %xmm5, %xmm4 1711; AVX1-NEXT: vpmaxsb %xmm3, %xmm1, %xmm1 1712; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 1713; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1714; AVX1-NEXT: vpmaxsb %xmm4, %xmm0, %xmm0 1715; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1716; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1717; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 1718; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 1719; AVX1-NEXT: vphminposuw %xmm0, %xmm0 1720; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1721; AVX1-NEXT: vpextrb $0, %xmm0, %eax 1722; AVX1-NEXT: # kill: def $al killed $al killed $eax 1723; AVX1-NEXT: vzeroupper 1724; AVX1-NEXT: retq 1725; 1726; AVX2-LABEL: test_v128i8: 1727; AVX2: # %bb.0: 1728; AVX2-NEXT: vpmaxsb %ymm3, %ymm1, %ymm1 1729; AVX2-NEXT: vpmaxsb %ymm2, %ymm0, %ymm0 1730; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1731; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1732; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1733; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1734; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1735; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 1736; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 1737; AVX2-NEXT: vphminposuw %xmm0, %xmm0 1738; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1739; AVX2-NEXT: vpextrb $0, %xmm0, %eax 1740; AVX2-NEXT: # kill: def $al killed $al killed $eax 1741; AVX2-NEXT: vzeroupper 1742; AVX2-NEXT: retq 1743; 1744; AVX512-LABEL: test_v128i8: 1745; AVX512: # %bb.0: 1746; AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 1747; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1748; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 1749; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1750; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 1751; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1752; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1753; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 1754; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 1755; AVX512-NEXT: vphminposuw %xmm0, %xmm0 1756; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 1757; AVX512-NEXT: vpextrb $0, %xmm0, %eax 1758; AVX512-NEXT: # kill: def $al killed $al killed $eax 1759; AVX512-NEXT: vzeroupper 1760; AVX512-NEXT: retq 1761 %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8> %a0) 1762 ret i8 %1 1763} 1764 1765declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>) 1766declare i64 @llvm.experimental.vector.reduce.smax.i64.v4i64(<4 x i64>) 1767declare i64 @llvm.experimental.vector.reduce.smax.i64.v8i64(<8 x i64>) 1768declare i64 @llvm.experimental.vector.reduce.smax.i64.v16i64(<16 x i64>) 1769 1770declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>) 1771declare i32 @llvm.experimental.vector.reduce.smax.i32.v8i32(<8 x i32>) 1772declare i32 @llvm.experimental.vector.reduce.smax.i32.v16i32(<16 x i32>) 1773declare i32 @llvm.experimental.vector.reduce.smax.i32.v32i32(<32 x i32>) 1774 1775declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>) 1776declare i16 @llvm.experimental.vector.reduce.smax.i16.v16i16(<16 x i16>) 1777declare i16 @llvm.experimental.vector.reduce.smax.i16.v32i16(<32 x i16>) 1778declare i16 @llvm.experimental.vector.reduce.smax.i16.v64i16(<64 x i16>) 1779 1780declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>) 1781declare i8 @llvm.experimental.vector.reduce.smax.i8.v32i8(<32 x i8>) 1782declare i8 @llvm.experimental.vector.reduce.smax.i8.v64i8(<64 x i8>) 1783declare i8 @llvm.experimental.vector.reduce.smax.i8.v128i8(<128 x i8>) 1784