1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86-SSE42 4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42 8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2 10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 11 12; 13; 128-bit Vectors 14; 15 16define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17; X86-SSE2-LABEL: test_reduce_v2i64: 18; X86-SSE2: ## %bb.0: 19; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 25; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29; X86-SSE2-NEXT: pand %xmm5, %xmm2 30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31; X86-SSE2-NEXT: por %xmm2, %xmm3 32; X86-SSE2-NEXT: pand %xmm3, %xmm0 33; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34; X86-SSE2-NEXT: por %xmm0, %xmm3 35; X86-SSE2-NEXT: movd %xmm3, %eax 36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 37; X86-SSE2-NEXT: movd %xmm0, %edx 38; X86-SSE2-NEXT: retl 39; 40; X86-SSE42-LABEL: test_reduce_v2i64: 41; X86-SSE42: ## %bb.0: 42; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 44; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] 45; X86-SSE42-NEXT: pxor %xmm3, %xmm0 46; X86-SSE42-NEXT: pxor %xmm2, %xmm3 47; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 48; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 49; X86-SSE42-NEXT: movd %xmm2, %eax 50; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 51; X86-SSE42-NEXT: retl 52; 53; X86-AVX1-LABEL: test_reduce_v2i64: 54; X86-AVX1: ## %bb.0: 55; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 56; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 57; X86-AVX1-NEXT: ## xmm2 = mem[0,0] 58; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3 59; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm2 60; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 61; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 62; X86-AVX1-NEXT: vmovd %xmm0, %eax 63; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 64; X86-AVX1-NEXT: retl 65; 66; X86-AVX2-LABEL: test_reduce_v2i64: 67; X86-AVX2: ## %bb.0: 68; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 69; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 70; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 71; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 72; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 73; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 74; X86-AVX2-NEXT: vmovd %xmm0, %eax 75; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 76; X86-AVX2-NEXT: retl 77; 78; X64-SSE2-LABEL: test_reduce_v2i64: 79; X64-SSE2: ## %bb.0: 80; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 81; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 82; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 83; X64-SSE2-NEXT: pxor %xmm2, %xmm3 84; X64-SSE2-NEXT: pxor %xmm1, %xmm2 85; X64-SSE2-NEXT: movdqa %xmm3, %xmm4 86; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm4 87; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 88; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 89; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 90; X64-SSE2-NEXT: pand %xmm5, %xmm2 91; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 92; X64-SSE2-NEXT: por %xmm2, %xmm3 93; X64-SSE2-NEXT: pand %xmm3, %xmm0 94; X64-SSE2-NEXT: pandn %xmm1, %xmm3 95; X64-SSE2-NEXT: por %xmm0, %xmm3 96; X64-SSE2-NEXT: movq %xmm3, %rax 97; X64-SSE2-NEXT: retq 98; 99; X64-SSE42-LABEL: test_reduce_v2i64: 100; X64-SSE42: ## %bb.0: 101; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 102; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 103; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 104; X64-SSE42-NEXT: pxor %xmm3, %xmm0 105; X64-SSE42-NEXT: pxor %xmm2, %xmm3 106; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 107; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 108; X64-SSE42-NEXT: movq %xmm2, %rax 109; X64-SSE42-NEXT: retq 110; 111; X64-AVX1-LABEL: test_reduce_v2i64: 112; X64-AVX1: ## %bb.0: 113; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 114; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 115; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 116; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 117; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 118; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 119; X64-AVX1-NEXT: vmovq %xmm0, %rax 120; X64-AVX1-NEXT: retq 121; 122; X64-AVX2-LABEL: test_reduce_v2i64: 123; X64-AVX2: ## %bb.0: 124; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 125; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 126; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 127; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 128; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 129; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 130; X64-AVX2-NEXT: vmovq %xmm0, %rax 131; X64-AVX2-NEXT: retq 132; 133; X64-AVX512-LABEL: test_reduce_v2i64: 134; X64-AVX512: ## %bb.0: 135; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 136; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 137; X64-AVX512-NEXT: vmovq %xmm0, %rax 138; X64-AVX512-NEXT: retq 139 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 140 %2 = icmp ugt <2 x i64> %a0, %1 141 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 142 %4 = extractelement <2 x i64> %3, i32 0 143 ret i64 %4 144} 145 146define i32 @test_reduce_v4i32(<4 x i32> %a0) { 147; X86-SSE2-LABEL: test_reduce_v4i32: 148; X86-SSE2: ## %bb.0: 149; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 150; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 151; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 152; X86-SSE2-NEXT: pxor %xmm2, %xmm3 153; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 154; X86-SSE2-NEXT: pxor %xmm2, %xmm4 155; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 156; X86-SSE2-NEXT: pand %xmm3, %xmm0 157; X86-SSE2-NEXT: pandn %xmm1, %xmm3 158; X86-SSE2-NEXT: por %xmm0, %xmm3 159; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 160; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 161; X86-SSE2-NEXT: pxor %xmm2, %xmm1 162; X86-SSE2-NEXT: pxor %xmm0, %xmm2 163; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 164; X86-SSE2-NEXT: pand %xmm1, %xmm3 165; X86-SSE2-NEXT: pandn %xmm0, %xmm1 166; X86-SSE2-NEXT: por %xmm3, %xmm1 167; X86-SSE2-NEXT: movd %xmm1, %eax 168; X86-SSE2-NEXT: retl 169; 170; X86-SSE42-LABEL: test_reduce_v4i32: 171; X86-SSE42: ## %bb.0: 172; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 173; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1 174; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 175; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0 176; X86-SSE42-NEXT: movd %xmm0, %eax 177; X86-SSE42-NEXT: retl 178; 179; X86-AVX-LABEL: test_reduce_v4i32: 180; X86-AVX: ## %bb.0: 181; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 182; X86-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 183; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 184; X86-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 185; X86-AVX-NEXT: vmovd %xmm0, %eax 186; X86-AVX-NEXT: retl 187; 188; X64-SSE2-LABEL: test_reduce_v4i32: 189; X64-SSE2: ## %bb.0: 190; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 191; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 192; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 193; X64-SSE2-NEXT: pxor %xmm2, %xmm3 194; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 195; X64-SSE2-NEXT: pxor %xmm2, %xmm4 196; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 197; X64-SSE2-NEXT: pand %xmm3, %xmm0 198; X64-SSE2-NEXT: pandn %xmm1, %xmm3 199; X64-SSE2-NEXT: por %xmm0, %xmm3 200; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 201; X64-SSE2-NEXT: movdqa %xmm3, %xmm1 202; X64-SSE2-NEXT: pxor %xmm2, %xmm1 203; X64-SSE2-NEXT: pxor %xmm0, %xmm2 204; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 205; X64-SSE2-NEXT: pand %xmm1, %xmm3 206; X64-SSE2-NEXT: pandn %xmm0, %xmm1 207; X64-SSE2-NEXT: por %xmm3, %xmm1 208; X64-SSE2-NEXT: movd %xmm1, %eax 209; X64-SSE2-NEXT: retq 210; 211; X64-SSE42-LABEL: test_reduce_v4i32: 212; X64-SSE42: ## %bb.0: 213; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 214; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1 215; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 216; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0 217; X64-SSE42-NEXT: movd %xmm0, %eax 218; X64-SSE42-NEXT: retq 219; 220; X64-AVX-LABEL: test_reduce_v4i32: 221; X64-AVX: ## %bb.0: 222; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 223; X64-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 224; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 225; X64-AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 226; X64-AVX-NEXT: vmovd %xmm0, %eax 227; X64-AVX-NEXT: retq 228 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 229 %2 = icmp ugt <4 x i32> %a0, %1 230 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 231 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 232 %5 = icmp ugt <4 x i32> %3, %4 233 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 234 %7 = extractelement <4 x i32> %6, i32 0 235 ret i32 %7 236} 237 238define i16 @test_reduce_v8i16(<8 x i16> %a0) { 239; X86-SSE2-LABEL: test_reduce_v8i16: 240; X86-SSE2: ## %bb.0: 241; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 242; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 243; X86-SSE2-NEXT: paddw %xmm0, %xmm1 244; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 245; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 246; X86-SSE2-NEXT: paddw %xmm1, %xmm0 247; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 248; X86-SSE2-NEXT: psrld $16, %xmm1 249; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 250; X86-SSE2-NEXT: paddw %xmm0, %xmm1 251; X86-SSE2-NEXT: movd %xmm1, %eax 252; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 253; X86-SSE2-NEXT: retl 254; 255; X86-SSE42-LABEL: test_reduce_v8i16: 256; X86-SSE42: ## %bb.0: 257; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 258; X86-SSE42-NEXT: pxor %xmm0, %xmm1 259; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 260; X86-SSE42-NEXT: movd %xmm0, %eax 261; X86-SSE42-NEXT: notl %eax 262; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 263; X86-SSE42-NEXT: retl 264; 265; X86-AVX-LABEL: test_reduce_v8i16: 266; X86-AVX: ## %bb.0: 267; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 268; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 269; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 270; X86-AVX-NEXT: vmovd %xmm0, %eax 271; X86-AVX-NEXT: notl %eax 272; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 273; X86-AVX-NEXT: retl 274; 275; X64-SSE2-LABEL: test_reduce_v8i16: 276; X64-SSE2: ## %bb.0: 277; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 278; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 279; X64-SSE2-NEXT: paddw %xmm0, %xmm1 280; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 281; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 282; X64-SSE2-NEXT: paddw %xmm1, %xmm0 283; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 284; X64-SSE2-NEXT: psrld $16, %xmm1 285; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 286; X64-SSE2-NEXT: paddw %xmm0, %xmm1 287; X64-SSE2-NEXT: movd %xmm1, %eax 288; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 289; X64-SSE2-NEXT: retq 290; 291; X64-SSE42-LABEL: test_reduce_v8i16: 292; X64-SSE42: ## %bb.0: 293; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 294; X64-SSE42-NEXT: pxor %xmm0, %xmm1 295; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 296; X64-SSE42-NEXT: movd %xmm0, %eax 297; X64-SSE42-NEXT: notl %eax 298; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 299; X64-SSE42-NEXT: retq 300; 301; X64-AVX1-LABEL: test_reduce_v8i16: 302; X64-AVX1: ## %bb.0: 303; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 304; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 305; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 306; X64-AVX1-NEXT: vmovd %xmm0, %eax 307; X64-AVX1-NEXT: notl %eax 308; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 309; X64-AVX1-NEXT: retq 310; 311; X64-AVX2-LABEL: test_reduce_v8i16: 312; X64-AVX2: ## %bb.0: 313; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 314; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 315; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 316; X64-AVX2-NEXT: vmovd %xmm0, %eax 317; X64-AVX2-NEXT: notl %eax 318; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 319; X64-AVX2-NEXT: retq 320; 321; X64-AVX512-LABEL: test_reduce_v8i16: 322; X64-AVX512: ## %bb.0: 323; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 324; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 325; X64-AVX512-NEXT: vmovd %xmm0, %eax 326; X64-AVX512-NEXT: notl %eax 327; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 328; X64-AVX512-NEXT: retq 329 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 330 %2 = icmp ugt <8 x i16> %a0, %1 331 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 332 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 333 %5 = icmp ugt <8 x i16> %3, %4 334 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 335 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 336 %8 = icmp ugt <8 x i16> %6, %7 337 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 338 %10 = extractelement <8 x i16> %9, i32 0 339 ret i16 %10 340} 341 342define i8 @test_reduce_v16i8(<16 x i8> %a0) { 343; X86-SSE2-LABEL: test_reduce_v16i8: 344; X86-SSE2: ## %bb.0: 345; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 346; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 347; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 348; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 349; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 350; X86-SSE2-NEXT: psrld $16, %xmm1 351; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 352; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 353; X86-SSE2-NEXT: psrlw $8, %xmm0 354; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 355; X86-SSE2-NEXT: movd %xmm0, %eax 356; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 357; X86-SSE2-NEXT: retl 358; 359; X86-SSE42-LABEL: test_reduce_v16i8: 360; X86-SSE42: ## %bb.0: 361; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 362; X86-SSE42-NEXT: pxor %xmm0, %xmm1 363; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 364; X86-SSE42-NEXT: psrlw $8, %xmm0 365; X86-SSE42-NEXT: pminub %xmm1, %xmm0 366; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 367; X86-SSE42-NEXT: movd %xmm0, %eax 368; X86-SSE42-NEXT: notb %al 369; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 370; X86-SSE42-NEXT: retl 371; 372; X86-AVX-LABEL: test_reduce_v16i8: 373; X86-AVX: ## %bb.0: 374; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 375; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 376; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 377; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 378; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 379; X86-AVX-NEXT: vmovd %xmm0, %eax 380; X86-AVX-NEXT: notb %al 381; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 382; X86-AVX-NEXT: retl 383; 384; X64-SSE2-LABEL: test_reduce_v16i8: 385; X64-SSE2: ## %bb.0: 386; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 387; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 388; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 389; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 390; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 391; X64-SSE2-NEXT: psrld $16, %xmm1 392; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 393; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 394; X64-SSE2-NEXT: psrlw $8, %xmm0 395; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 396; X64-SSE2-NEXT: movd %xmm0, %eax 397; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 398; X64-SSE2-NEXT: retq 399; 400; X64-SSE42-LABEL: test_reduce_v16i8: 401; X64-SSE42: ## %bb.0: 402; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 403; X64-SSE42-NEXT: pxor %xmm0, %xmm1 404; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 405; X64-SSE42-NEXT: psrlw $8, %xmm0 406; X64-SSE42-NEXT: pminub %xmm1, %xmm0 407; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 408; X64-SSE42-NEXT: movd %xmm0, %eax 409; X64-SSE42-NEXT: notb %al 410; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 411; X64-SSE42-NEXT: retq 412; 413; X64-AVX1-LABEL: test_reduce_v16i8: 414; X64-AVX1: ## %bb.0: 415; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 416; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 417; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 418; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 419; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 420; X64-AVX1-NEXT: vmovd %xmm0, %eax 421; X64-AVX1-NEXT: notb %al 422; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 423; X64-AVX1-NEXT: retq 424; 425; X64-AVX2-LABEL: test_reduce_v16i8: 426; X64-AVX2: ## %bb.0: 427; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 428; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 429; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 430; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 431; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 432; X64-AVX2-NEXT: vmovd %xmm0, %eax 433; X64-AVX2-NEXT: notb %al 434; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 435; X64-AVX2-NEXT: retq 436; 437; X64-AVX512-LABEL: test_reduce_v16i8: 438; X64-AVX512: ## %bb.0: 439; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 440; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 441; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 442; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 443; X64-AVX512-NEXT: vmovd %xmm0, %eax 444; X64-AVX512-NEXT: notb %al 445; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 446; X64-AVX512-NEXT: retq 447 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 448 %2 = icmp ugt <16 x i8> %a0, %1 449 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 450 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 451 %5 = icmp ugt <16 x i8> %3, %4 452 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 453 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 454 %8 = icmp ugt <16 x i8> %6, %7 455 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 456 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 457 %11 = icmp ugt <16 x i8> %9, %10 458 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 459 %13 = extractelement <16 x i8> %12, i32 0 460 ret i8 %13 461} 462 463; 464; 256-bit Vectors 465; 466 467define i64 @test_reduce_v4i64(<4 x i64> %a0) { 468; X86-SSE2-LABEL: test_reduce_v4i64: 469; X86-SSE2: ## %bb.0: 470; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 471; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 472; X86-SSE2-NEXT: pxor %xmm2, %xmm3 473; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 474; X86-SSE2-NEXT: pxor %xmm2, %xmm4 475; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 476; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 477; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 478; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 479; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 480; X86-SSE2-NEXT: pand %xmm6, %xmm3 481; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 482; X86-SSE2-NEXT: por %xmm3, %xmm4 483; X86-SSE2-NEXT: pand %xmm4, %xmm0 484; X86-SSE2-NEXT: pandn %xmm1, %xmm4 485; X86-SSE2-NEXT: por %xmm0, %xmm4 486; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 487; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 488; X86-SSE2-NEXT: pxor %xmm2, %xmm1 489; X86-SSE2-NEXT: pxor %xmm0, %xmm2 490; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 491; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 492; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 493; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 494; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 495; X86-SSE2-NEXT: pand %xmm5, %xmm1 496; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 497; X86-SSE2-NEXT: por %xmm1, %xmm2 498; X86-SSE2-NEXT: pand %xmm2, %xmm4 499; X86-SSE2-NEXT: pandn %xmm0, %xmm2 500; X86-SSE2-NEXT: por %xmm4, %xmm2 501; X86-SSE2-NEXT: movd %xmm2, %eax 502; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 503; X86-SSE2-NEXT: movd %xmm0, %edx 504; X86-SSE2-NEXT: retl 505; 506; X86-SSE42-LABEL: test_reduce_v4i64: 507; X86-SSE42: ## %bb.0: 508; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 509; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] 510; X86-SSE42-NEXT: movdqa %xmm1, %xmm4 511; X86-SSE42-NEXT: pxor %xmm3, %xmm4 512; X86-SSE42-NEXT: pxor %xmm3, %xmm0 513; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 514; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 515; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 516; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 517; X86-SSE42-NEXT: pxor %xmm3, %xmm0 518; X86-SSE42-NEXT: pxor %xmm2, %xmm3 519; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 520; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 521; X86-SSE42-NEXT: movd %xmm2, %eax 522; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 523; X86-SSE42-NEXT: retl 524; 525; X86-AVX1-LABEL: test_reduce_v4i64: 526; X86-AVX1: ## %bb.0: 527; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 528; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 529; X86-AVX1-NEXT: ## xmm2 = mem[0,0] 530; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3 531; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4 532; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 533; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 534; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 535; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 536; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 537; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 538; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 539; X86-AVX1-NEXT: vmovd %xmm0, %eax 540; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 541; X86-AVX1-NEXT: vzeroupper 542; X86-AVX1-NEXT: retl 543; 544; X86-AVX2-LABEL: test_reduce_v4i64: 545; X86-AVX2: ## %bb.0: 546; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 547; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 548; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 549; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4 550; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 551; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 552; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 553; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 554; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 555; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 556; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 557; X86-AVX2-NEXT: vmovd %xmm0, %eax 558; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 559; X86-AVX2-NEXT: vzeroupper 560; X86-AVX2-NEXT: retl 561; 562; X64-SSE2-LABEL: test_reduce_v4i64: 563; X64-SSE2: ## %bb.0: 564; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 565; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 566; X64-SSE2-NEXT: pxor %xmm2, %xmm3 567; X64-SSE2-NEXT: movdqa %xmm0, %xmm4 568; X64-SSE2-NEXT: pxor %xmm2, %xmm4 569; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 570; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 571; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 572; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 573; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 574; X64-SSE2-NEXT: pand %xmm6, %xmm3 575; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 576; X64-SSE2-NEXT: por %xmm3, %xmm4 577; X64-SSE2-NEXT: pand %xmm4, %xmm0 578; X64-SSE2-NEXT: pandn %xmm1, %xmm4 579; X64-SSE2-NEXT: por %xmm0, %xmm4 580; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 581; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 582; X64-SSE2-NEXT: pxor %xmm2, %xmm1 583; X64-SSE2-NEXT: pxor %xmm0, %xmm2 584; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 585; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 586; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 587; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 588; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 589; X64-SSE2-NEXT: pand %xmm5, %xmm1 590; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 591; X64-SSE2-NEXT: por %xmm1, %xmm2 592; X64-SSE2-NEXT: pand %xmm2, %xmm4 593; X64-SSE2-NEXT: pandn %xmm0, %xmm2 594; X64-SSE2-NEXT: por %xmm4, %xmm2 595; X64-SSE2-NEXT: movq %xmm2, %rax 596; X64-SSE2-NEXT: retq 597; 598; X64-SSE42-LABEL: test_reduce_v4i64: 599; X64-SSE42: ## %bb.0: 600; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 601; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 602; X64-SSE42-NEXT: movdqa %xmm1, %xmm4 603; X64-SSE42-NEXT: pxor %xmm3, %xmm4 604; X64-SSE42-NEXT: pxor %xmm3, %xmm0 605; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 606; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 607; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 608; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 609; X64-SSE42-NEXT: pxor %xmm3, %xmm0 610; X64-SSE42-NEXT: pxor %xmm2, %xmm3 611; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 612; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 613; X64-SSE42-NEXT: movq %xmm2, %rax 614; X64-SSE42-NEXT: retq 615; 616; X64-AVX1-LABEL: test_reduce_v4i64: 617; X64-AVX1: ## %bb.0: 618; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 619; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 620; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3 621; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm4 622; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 623; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 624; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 625; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 626; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 627; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 628; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 629; X64-AVX1-NEXT: vmovq %xmm0, %rax 630; X64-AVX1-NEXT: vzeroupper 631; X64-AVX1-NEXT: retq 632; 633; X64-AVX2-LABEL: test_reduce_v4i64: 634; X64-AVX2: ## %bb.0: 635; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 636; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 637; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3 638; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4 639; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 640; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 641; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 642; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 643; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 644; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 645; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 646; X64-AVX2-NEXT: vmovq %xmm0, %rax 647; X64-AVX2-NEXT: vzeroupper 648; X64-AVX2-NEXT: retq 649; 650; X64-AVX512-LABEL: test_reduce_v4i64: 651; X64-AVX512: ## %bb.0: 652; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 653; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 654; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 655; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 656; X64-AVX512-NEXT: vmovq %xmm0, %rax 657; X64-AVX512-NEXT: vzeroupper 658; X64-AVX512-NEXT: retq 659 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 660 %2 = icmp ugt <4 x i64> %a0, %1 661 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 662 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 663 %5 = icmp ugt <4 x i64> %3, %4 664 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 665 %7 = extractelement <4 x i64> %6, i32 0 666 ret i64 %7 667} 668 669define i32 @test_reduce_v8i32(<8 x i32> %a0) { 670; X86-SSE2-LABEL: test_reduce_v8i32: 671; X86-SSE2: ## %bb.0: 672; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 673; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 674; X86-SSE2-NEXT: pxor %xmm2, %xmm3 675; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 676; X86-SSE2-NEXT: pxor %xmm2, %xmm4 677; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 678; X86-SSE2-NEXT: pand %xmm4, %xmm0 679; X86-SSE2-NEXT: pandn %xmm1, %xmm4 680; X86-SSE2-NEXT: por %xmm0, %xmm4 681; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 682; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 683; X86-SSE2-NEXT: pxor %xmm2, %xmm1 684; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 685; X86-SSE2-NEXT: pxor %xmm2, %xmm3 686; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm1 687; X86-SSE2-NEXT: pand %xmm1, %xmm4 688; X86-SSE2-NEXT: pandn %xmm0, %xmm1 689; X86-SSE2-NEXT: por %xmm4, %xmm1 690; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 691; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 692; X86-SSE2-NEXT: pxor %xmm2, %xmm3 693; X86-SSE2-NEXT: pxor %xmm0, %xmm2 694; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 695; X86-SSE2-NEXT: pand %xmm3, %xmm1 696; X86-SSE2-NEXT: pandn %xmm0, %xmm3 697; X86-SSE2-NEXT: por %xmm1, %xmm3 698; X86-SSE2-NEXT: movd %xmm3, %eax 699; X86-SSE2-NEXT: retl 700; 701; X86-SSE42-LABEL: test_reduce_v8i32: 702; X86-SSE42: ## %bb.0: 703; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0 704; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 705; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1 706; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 707; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0 708; X86-SSE42-NEXT: movd %xmm0, %eax 709; X86-SSE42-NEXT: retl 710; 711; X86-AVX1-LABEL: test_reduce_v8i32: 712; X86-AVX1: ## %bb.0: 713; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 714; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 715; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 716; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 717; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 718; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 719; X86-AVX1-NEXT: vmovd %xmm0, %eax 720; X86-AVX1-NEXT: vzeroupper 721; X86-AVX1-NEXT: retl 722; 723; X86-AVX2-LABEL: test_reduce_v8i32: 724; X86-AVX2: ## %bb.0: 725; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 726; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 727; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 728; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 729; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 730; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 731; X86-AVX2-NEXT: vmovd %xmm0, %eax 732; X86-AVX2-NEXT: vzeroupper 733; X86-AVX2-NEXT: retl 734; 735; X64-SSE2-LABEL: test_reduce_v8i32: 736; X64-SSE2: ## %bb.0: 737; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 738; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 739; X64-SSE2-NEXT: pxor %xmm2, %xmm3 740; X64-SSE2-NEXT: movdqa %xmm0, %xmm4 741; X64-SSE2-NEXT: pxor %xmm2, %xmm4 742; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 743; X64-SSE2-NEXT: pand %xmm4, %xmm0 744; X64-SSE2-NEXT: pandn %xmm1, %xmm4 745; X64-SSE2-NEXT: por %xmm0, %xmm4 746; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 747; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 748; X64-SSE2-NEXT: pxor %xmm2, %xmm1 749; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 750; X64-SSE2-NEXT: pxor %xmm2, %xmm3 751; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm1 752; X64-SSE2-NEXT: pand %xmm1, %xmm4 753; X64-SSE2-NEXT: pandn %xmm0, %xmm1 754; X64-SSE2-NEXT: por %xmm4, %xmm1 755; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 756; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 757; X64-SSE2-NEXT: pxor %xmm2, %xmm3 758; X64-SSE2-NEXT: pxor %xmm0, %xmm2 759; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 760; X64-SSE2-NEXT: pand %xmm3, %xmm1 761; X64-SSE2-NEXT: pandn %xmm0, %xmm3 762; X64-SSE2-NEXT: por %xmm1, %xmm3 763; X64-SSE2-NEXT: movd %xmm3, %eax 764; X64-SSE2-NEXT: retq 765; 766; X64-SSE42-LABEL: test_reduce_v8i32: 767; X64-SSE42: ## %bb.0: 768; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0 769; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 770; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1 771; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 772; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0 773; X64-SSE42-NEXT: movd %xmm0, %eax 774; X64-SSE42-NEXT: retq 775; 776; X64-AVX1-LABEL: test_reduce_v8i32: 777; X64-AVX1: ## %bb.0: 778; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 779; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 780; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 781; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 782; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 783; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 784; X64-AVX1-NEXT: vmovd %xmm0, %eax 785; X64-AVX1-NEXT: vzeroupper 786; X64-AVX1-NEXT: retq 787; 788; X64-AVX2-LABEL: test_reduce_v8i32: 789; X64-AVX2: ## %bb.0: 790; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 791; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 792; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 793; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 794; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 795; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 796; X64-AVX2-NEXT: vmovd %xmm0, %eax 797; X64-AVX2-NEXT: vzeroupper 798; X64-AVX2-NEXT: retq 799; 800; X64-AVX512-LABEL: test_reduce_v8i32: 801; X64-AVX512: ## %bb.0: 802; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 803; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 804; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 805; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 806; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 807; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 808; X64-AVX512-NEXT: vmovd %xmm0, %eax 809; X64-AVX512-NEXT: vzeroupper 810; X64-AVX512-NEXT: retq 811 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 812 %2 = icmp ugt <8 x i32> %a0, %1 813 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 814 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 815 %5 = icmp ugt <8 x i32> %3, %4 816 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 817 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 818 %8 = icmp ugt <8 x i32> %6, %7 819 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 820 %10 = extractelement <8 x i32> %9, i32 0 821 ret i32 %10 822} 823 824define i16 @test_reduce_v16i16(<16 x i16> %a0) { 825; X86-SSE2-LABEL: test_reduce_v16i16: 826; X86-SSE2: ## %bb.0: 827; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 828; X86-SSE2-NEXT: paddw %xmm0, %xmm1 829; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 830; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 831; X86-SSE2-NEXT: paddw %xmm1, %xmm0 832; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 833; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 834; X86-SSE2-NEXT: paddw %xmm0, %xmm1 835; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 836; X86-SSE2-NEXT: psrld $16, %xmm0 837; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 838; X86-SSE2-NEXT: paddw %xmm1, %xmm0 839; X86-SSE2-NEXT: movd %xmm0, %eax 840; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 841; X86-SSE2-NEXT: retl 842; 843; X86-SSE42-LABEL: test_reduce_v16i16: 844; X86-SSE42: ## %bb.0: 845; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0 846; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 847; X86-SSE42-NEXT: pxor %xmm0, %xmm1 848; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 849; X86-SSE42-NEXT: movd %xmm0, %eax 850; X86-SSE42-NEXT: notl %eax 851; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 852; X86-SSE42-NEXT: retl 853; 854; X86-AVX1-LABEL: test_reduce_v16i16: 855; X86-AVX1: ## %bb.0: 856; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 857; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 858; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 859; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 860; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 861; X86-AVX1-NEXT: vmovd %xmm0, %eax 862; X86-AVX1-NEXT: notl %eax 863; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 864; X86-AVX1-NEXT: vzeroupper 865; X86-AVX1-NEXT: retl 866; 867; X86-AVX2-LABEL: test_reduce_v16i16: 868; X86-AVX2: ## %bb.0: 869; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 870; X86-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 871; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 872; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 873; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 874; X86-AVX2-NEXT: vmovd %xmm0, %eax 875; X86-AVX2-NEXT: notl %eax 876; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 877; X86-AVX2-NEXT: vzeroupper 878; X86-AVX2-NEXT: retl 879; 880; X64-SSE2-LABEL: test_reduce_v16i16: 881; X64-SSE2: ## %bb.0: 882; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 883; X64-SSE2-NEXT: paddw %xmm0, %xmm1 884; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 885; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 886; X64-SSE2-NEXT: paddw %xmm1, %xmm0 887; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 888; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 889; X64-SSE2-NEXT: paddw %xmm0, %xmm1 890; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 891; X64-SSE2-NEXT: psrld $16, %xmm0 892; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 893; X64-SSE2-NEXT: paddw %xmm1, %xmm0 894; X64-SSE2-NEXT: movd %xmm0, %eax 895; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 896; X64-SSE2-NEXT: retq 897; 898; X64-SSE42-LABEL: test_reduce_v16i16: 899; X64-SSE42: ## %bb.0: 900; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0 901; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 902; X64-SSE42-NEXT: pxor %xmm0, %xmm1 903; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 904; X64-SSE42-NEXT: movd %xmm0, %eax 905; X64-SSE42-NEXT: notl %eax 906; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 907; X64-SSE42-NEXT: retq 908; 909; X64-AVX1-LABEL: test_reduce_v16i16: 910; X64-AVX1: ## %bb.0: 911; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 912; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 913; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 914; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 915; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 916; X64-AVX1-NEXT: vmovd %xmm0, %eax 917; X64-AVX1-NEXT: notl %eax 918; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 919; X64-AVX1-NEXT: vzeroupper 920; X64-AVX1-NEXT: retq 921; 922; X64-AVX2-LABEL: test_reduce_v16i16: 923; X64-AVX2: ## %bb.0: 924; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 925; X64-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 926; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 927; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 928; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 929; X64-AVX2-NEXT: vmovd %xmm0, %eax 930; X64-AVX2-NEXT: notl %eax 931; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 932; X64-AVX2-NEXT: vzeroupper 933; X64-AVX2-NEXT: retq 934; 935; X64-AVX512-LABEL: test_reduce_v16i16: 936; X64-AVX512: ## %bb.0: 937; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 938; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 939; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 940; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 941; X64-AVX512-NEXT: vmovd %xmm0, %eax 942; X64-AVX512-NEXT: notl %eax 943; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 944; X64-AVX512-NEXT: vzeroupper 945; X64-AVX512-NEXT: retq 946 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 947 %2 = icmp ugt <16 x i16> %a0, %1 948 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 949 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 950 %5 = icmp ugt <16 x i16> %3, %4 951 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 952 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 953 %8 = icmp ugt <16 x i16> %6, %7 954 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 955 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 956 %11 = icmp ugt <16 x i16> %9, %10 957 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 958 %13 = extractelement <16 x i16> %12, i32 0 959 ret i16 %13 960} 961 962define i8 @test_reduce_v32i8(<32 x i8> %a0) { 963; X86-SSE2-LABEL: test_reduce_v32i8: 964; X86-SSE2: ## %bb.0: 965; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 966; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 967; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 968; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 969; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 970; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 971; X86-SSE2-NEXT: psrld $16, %xmm1 972; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 973; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 974; X86-SSE2-NEXT: psrlw $8, %xmm0 975; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 976; X86-SSE2-NEXT: movd %xmm0, %eax 977; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 978; X86-SSE2-NEXT: retl 979; 980; X86-SSE42-LABEL: test_reduce_v32i8: 981; X86-SSE42: ## %bb.0: 982; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0 983; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 984; X86-SSE42-NEXT: pxor %xmm0, %xmm1 985; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 986; X86-SSE42-NEXT: psrlw $8, %xmm0 987; X86-SSE42-NEXT: pminub %xmm1, %xmm0 988; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 989; X86-SSE42-NEXT: movd %xmm0, %eax 990; X86-SSE42-NEXT: notb %al 991; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 992; X86-SSE42-NEXT: retl 993; 994; X86-AVX1-LABEL: test_reduce_v32i8: 995; X86-AVX1: ## %bb.0: 996; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 997; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 998; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 999; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1000; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1001; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1002; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1003; X86-AVX1-NEXT: vmovd %xmm0, %eax 1004; X86-AVX1-NEXT: notb %al 1005; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1006; X86-AVX1-NEXT: vzeroupper 1007; X86-AVX1-NEXT: retl 1008; 1009; X86-AVX2-LABEL: test_reduce_v32i8: 1010; X86-AVX2: ## %bb.0: 1011; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1012; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1013; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1014; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1015; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1016; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1017; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1018; X86-AVX2-NEXT: vmovd %xmm0, %eax 1019; X86-AVX2-NEXT: notb %al 1020; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1021; X86-AVX2-NEXT: vzeroupper 1022; X86-AVX2-NEXT: retl 1023; 1024; X64-SSE2-LABEL: test_reduce_v32i8: 1025; X64-SSE2: ## %bb.0: 1026; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1027; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1028; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 1029; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1030; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1031; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1032; X64-SSE2-NEXT: psrld $16, %xmm1 1033; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 1034; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1035; X64-SSE2-NEXT: psrlw $8, %xmm0 1036; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1037; X64-SSE2-NEXT: movd %xmm0, %eax 1038; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1039; X64-SSE2-NEXT: retq 1040; 1041; X64-SSE42-LABEL: test_reduce_v32i8: 1042; X64-SSE42: ## %bb.0: 1043; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0 1044; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 1045; X64-SSE42-NEXT: pxor %xmm0, %xmm1 1046; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1047; X64-SSE42-NEXT: psrlw $8, %xmm0 1048; X64-SSE42-NEXT: pminub %xmm1, %xmm0 1049; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1050; X64-SSE42-NEXT: movd %xmm0, %eax 1051; X64-SSE42-NEXT: notb %al 1052; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1053; X64-SSE42-NEXT: retq 1054; 1055; X64-AVX1-LABEL: test_reduce_v32i8: 1056; X64-AVX1: ## %bb.0: 1057; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 1058; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1059; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1060; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1061; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1062; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1063; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1064; X64-AVX1-NEXT: vmovd %xmm0, %eax 1065; X64-AVX1-NEXT: notb %al 1066; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1067; X64-AVX1-NEXT: vzeroupper 1068; X64-AVX1-NEXT: retq 1069; 1070; X64-AVX2-LABEL: test_reduce_v32i8: 1071; X64-AVX2: ## %bb.0: 1072; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1073; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1074; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1075; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1076; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1077; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1078; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1079; X64-AVX2-NEXT: vmovd %xmm0, %eax 1080; X64-AVX2-NEXT: notb %al 1081; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1082; X64-AVX2-NEXT: vzeroupper 1083; X64-AVX2-NEXT: retq 1084; 1085; X64-AVX512-LABEL: test_reduce_v32i8: 1086; X64-AVX512: ## %bb.0: 1087; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1088; X64-AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1089; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1090; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1091; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1092; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1093; X64-AVX512-NEXT: vmovd %xmm0, %eax 1094; X64-AVX512-NEXT: notb %al 1095; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1096; X64-AVX512-NEXT: vzeroupper 1097; X64-AVX512-NEXT: retq 1098 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1099 %2 = icmp ugt <32 x i8> %a0, %1 1100 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1101 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1102 %5 = icmp ugt <32 x i8> %3, %4 1103 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1104 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1105 %8 = icmp ugt <32 x i8> %6, %7 1106 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1107 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1108 %11 = icmp ugt <32 x i8> %9, %10 1109 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1110 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1111 %14 = icmp ugt <32 x i8> %12, %13 1112 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1113 %16 = extractelement <32 x i8> %15, i32 0 1114 ret i8 %16 1115} 1116 1117; 1118; 512-bit Vectors 1119; 1120 1121define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1122; X86-SSE2-LABEL: test_reduce_v8i64: 1123; X86-SSE2: ## %bb.0: 1124; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1125; X86-SSE2-NEXT: movdqa %xmm2, %xmm5 1126; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1127; X86-SSE2-NEXT: movdqa %xmm0, %xmm6 1128; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1129; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1130; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1131; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1132; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1133; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1134; X86-SSE2-NEXT: pand %xmm5, %xmm6 1135; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1136; X86-SSE2-NEXT: por %xmm6, %xmm5 1137; X86-SSE2-NEXT: pand %xmm5, %xmm0 1138; X86-SSE2-NEXT: pandn %xmm2, %xmm5 1139; X86-SSE2-NEXT: por %xmm0, %xmm5 1140; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1141; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1142; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1143; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1144; X86-SSE2-NEXT: movdqa %xmm2, %xmm6 1145; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1146; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1147; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1148; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1149; X86-SSE2-NEXT: pand %xmm0, %xmm2 1150; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 1151; X86-SSE2-NEXT: por %xmm2, %xmm0 1152; X86-SSE2-NEXT: pand %xmm0, %xmm1 1153; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1154; X86-SSE2-NEXT: por %xmm1, %xmm0 1155; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1156; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1157; X86-SSE2-NEXT: movdqa %xmm5, %xmm2 1158; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1159; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1160; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1161; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 1162; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,0,2,2] 1163; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1164; X86-SSE2-NEXT: pand %xmm1, %xmm2 1165; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1166; X86-SSE2-NEXT: por %xmm2, %xmm1 1167; X86-SSE2-NEXT: pand %xmm1, %xmm5 1168; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1169; X86-SSE2-NEXT: por %xmm5, %xmm1 1170; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1171; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1172; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1173; X86-SSE2-NEXT: pxor %xmm0, %xmm4 1174; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1175; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1176; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1177; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1178; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1179; X86-SSE2-NEXT: pand %xmm2, %xmm4 1180; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1181; X86-SSE2-NEXT: por %xmm4, %xmm2 1182; X86-SSE2-NEXT: pand %xmm2, %xmm1 1183; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1184; X86-SSE2-NEXT: por %xmm1, %xmm2 1185; X86-SSE2-NEXT: movd %xmm2, %eax 1186; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1187; X86-SSE2-NEXT: movd %xmm0, %edx 1188; X86-SSE2-NEXT: retl 1189; 1190; X86-SSE42-LABEL: test_reduce_v8i64: 1191; X86-SSE42: ## %bb.0: 1192; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 1193; X86-SSE42-NEXT: movdqa {{.*#+}} xmm5 = [0,2147483648,0,2147483648] 1194; X86-SSE42-NEXT: movdqa %xmm2, %xmm6 1195; X86-SSE42-NEXT: pxor %xmm5, %xmm6 1196; X86-SSE42-NEXT: pxor %xmm5, %xmm0 1197; X86-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1198; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1199; X86-SSE42-NEXT: movdqa %xmm3, %xmm4 1200; X86-SSE42-NEXT: pxor %xmm5, %xmm4 1201; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1202; X86-SSE42-NEXT: pxor %xmm5, %xmm0 1203; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1204; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1205; X86-SSE42-NEXT: movapd %xmm3, %xmm1 1206; X86-SSE42-NEXT: xorpd %xmm5, %xmm1 1207; X86-SSE42-NEXT: movapd %xmm2, %xmm0 1208; X86-SSE42-NEXT: xorpd %xmm5, %xmm0 1209; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1210; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1211; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1212; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1213; X86-SSE42-NEXT: pxor %xmm5, %xmm0 1214; X86-SSE42-NEXT: pxor %xmm1, %xmm5 1215; X86-SSE42-NEXT: pcmpgtq %xmm5, %xmm0 1216; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1217; X86-SSE42-NEXT: movd %xmm1, %eax 1218; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1219; X86-SSE42-NEXT: retl 1220; 1221; X86-AVX1-LABEL: test_reduce_v8i64: 1222; X86-AVX1: ## %bb.0: 1223; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1224; X86-AVX1-NEXT: ## xmm2 = mem[0,0] 1225; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm3 1226; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4 1227; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1228; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1229; X86-AVX1-NEXT: vxorps %xmm2, %xmm4, %xmm5 1230; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 1231; X86-AVX1-NEXT: vxorps %xmm2, %xmm6, %xmm7 1232; X86-AVX1-NEXT: vpcmpgtq %xmm5, %xmm7, %xmm5 1233; X86-AVX1-NEXT: vblendvpd %xmm5, %xmm6, %xmm4, %xmm4 1234; X86-AVX1-NEXT: vxorpd %xmm2, %xmm4, %xmm5 1235; X86-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1236; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm1 1237; X86-AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm1 1238; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm4, %xmm0 1239; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1240; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1241; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1242; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1243; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1244; X86-AVX1-NEXT: vmovd %xmm0, %eax 1245; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1246; X86-AVX1-NEXT: vzeroupper 1247; X86-AVX1-NEXT: retl 1248; 1249; X86-AVX2-LABEL: test_reduce_v8i64: 1250; X86-AVX2: ## %bb.0: 1251; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1252; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 1253; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm4 1254; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1255; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1256; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1257; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3 1258; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4 1259; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1260; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1261; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1262; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1263; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1264; X86-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1265; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1266; X86-AVX2-NEXT: vmovd %xmm0, %eax 1267; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1268; X86-AVX2-NEXT: vzeroupper 1269; X86-AVX2-NEXT: retl 1270; 1271; X64-SSE2-LABEL: test_reduce_v8i64: 1272; X64-SSE2: ## %bb.0: 1273; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] 1274; X64-SSE2-NEXT: movdqa %xmm2, %xmm5 1275; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1276; X64-SSE2-NEXT: movdqa %xmm0, %xmm6 1277; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1278; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1279; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1280; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1281; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1282; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1283; X64-SSE2-NEXT: pand %xmm8, %xmm6 1284; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1285; X64-SSE2-NEXT: por %xmm6, %xmm5 1286; X64-SSE2-NEXT: pand %xmm5, %xmm0 1287; X64-SSE2-NEXT: pandn %xmm2, %xmm5 1288; X64-SSE2-NEXT: por %xmm0, %xmm5 1289; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1290; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1291; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1292; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1293; X64-SSE2-NEXT: movdqa %xmm2, %xmm6 1294; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm6 1295; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1296; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1297; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] 1298; X64-SSE2-NEXT: pand %xmm7, %xmm0 1299; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 1300; X64-SSE2-NEXT: por %xmm0, %xmm2 1301; X64-SSE2-NEXT: pand %xmm2, %xmm1 1302; X64-SSE2-NEXT: pandn %xmm3, %xmm2 1303; X64-SSE2-NEXT: por %xmm1, %xmm2 1304; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1305; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1306; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1307; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1308; X64-SSE2-NEXT: movdqa %xmm1, %xmm3 1309; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1310; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[0,0,2,2] 1311; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1312; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1313; X64-SSE2-NEXT: pand %xmm6, %xmm0 1314; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1315; X64-SSE2-NEXT: por %xmm0, %xmm1 1316; X64-SSE2-NEXT: pand %xmm1, %xmm5 1317; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1318; X64-SSE2-NEXT: por %xmm5, %xmm1 1319; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1320; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1321; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1322; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1323; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1324; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm3 1325; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1326; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1327; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1328; X64-SSE2-NEXT: pand %xmm5, %xmm2 1329; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1330; X64-SSE2-NEXT: por %xmm2, %xmm3 1331; X64-SSE2-NEXT: pand %xmm3, %xmm1 1332; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1333; X64-SSE2-NEXT: por %xmm1, %xmm3 1334; X64-SSE2-NEXT: movq %xmm3, %rax 1335; X64-SSE2-NEXT: retq 1336; 1337; X64-SSE42-LABEL: test_reduce_v8i64: 1338; X64-SSE42: ## %bb.0: 1339; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 1340; X64-SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] 1341; X64-SSE42-NEXT: movdqa %xmm2, %xmm6 1342; X64-SSE42-NEXT: pxor %xmm5, %xmm6 1343; X64-SSE42-NEXT: pxor %xmm5, %xmm0 1344; X64-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1345; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1346; X64-SSE42-NEXT: movdqa %xmm3, %xmm4 1347; X64-SSE42-NEXT: pxor %xmm5, %xmm4 1348; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1349; X64-SSE42-NEXT: pxor %xmm5, %xmm0 1350; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1351; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1352; X64-SSE42-NEXT: movapd %xmm3, %xmm1 1353; X64-SSE42-NEXT: xorpd %xmm5, %xmm1 1354; X64-SSE42-NEXT: movapd %xmm2, %xmm0 1355; X64-SSE42-NEXT: xorpd %xmm5, %xmm0 1356; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1357; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1358; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1359; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1360; X64-SSE42-NEXT: pxor %xmm5, %xmm0 1361; X64-SSE42-NEXT: pxor %xmm1, %xmm5 1362; X64-SSE42-NEXT: pcmpgtq %xmm5, %xmm0 1363; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1364; X64-SSE42-NEXT: movq %xmm1, %rax 1365; X64-SSE42-NEXT: retq 1366; 1367; X64-AVX1-LABEL: test_reduce_v8i64: 1368; X64-AVX1: ## %bb.0: 1369; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1370; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3 1371; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm4 1372; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1373; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1374; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm5 1375; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 1376; X64-AVX1-NEXT: vpxor %xmm2, %xmm6, %xmm7 1377; X64-AVX1-NEXT: vpcmpgtq %xmm5, %xmm7, %xmm5 1378; X64-AVX1-NEXT: vblendvpd %xmm5, %xmm6, %xmm4, %xmm4 1379; X64-AVX1-NEXT: vxorpd %xmm2, %xmm4, %xmm5 1380; X64-AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1381; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm1 1382; X64-AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm1 1383; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm4, %xmm0 1384; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1385; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1386; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1387; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1388; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1389; X64-AVX1-NEXT: vmovq %xmm0, %rax 1390; X64-AVX1-NEXT: vzeroupper 1391; X64-AVX1-NEXT: retq 1392; 1393; X64-AVX2-LABEL: test_reduce_v8i64: 1394; X64-AVX2: ## %bb.0: 1395; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1396; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 1397; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm4 1398; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1399; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1400; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1401; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm3 1402; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm4 1403; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1404; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1405; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1406; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1407; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1408; X64-AVX2-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1409; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1410; X64-AVX2-NEXT: vmovq %xmm0, %rax 1411; X64-AVX2-NEXT: vzeroupper 1412; X64-AVX2-NEXT: retq 1413; 1414; X64-AVX512-LABEL: test_reduce_v8i64: 1415; X64-AVX512: ## %bb.0: 1416; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1417; X64-AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 1418; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1419; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 1420; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1421; X64-AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 1422; X64-AVX512-NEXT: vmovq %xmm0, %rax 1423; X64-AVX512-NEXT: vzeroupper 1424; X64-AVX512-NEXT: retq 1425 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1426 %2 = icmp ugt <8 x i64> %a0, %1 1427 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1428 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1429 %5 = icmp ugt <8 x i64> %3, %4 1430 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1431 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1432 %8 = icmp ugt <8 x i64> %6, %7 1433 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1434 %10 = extractelement <8 x i64> %9, i32 0 1435 ret i64 %10 1436} 1437 1438define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1439; X86-SSE2-LABEL: test_reduce_v16i32: 1440; X86-SSE2: ## %bb.0: 1441; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1442; X86-SSE2-NEXT: movdqa %xmm2, %xmm6 1443; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1444; X86-SSE2-NEXT: movdqa %xmm0, %xmm5 1445; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1446; X86-SSE2-NEXT: pcmpgtd %xmm6, %xmm5 1447; X86-SSE2-NEXT: pand %xmm5, %xmm0 1448; X86-SSE2-NEXT: pandn %xmm2, %xmm5 1449; X86-SSE2-NEXT: por %xmm0, %xmm5 1450; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1451; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1452; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1453; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1454; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1455; X86-SSE2-NEXT: pand %xmm2, %xmm1 1456; X86-SSE2-NEXT: pandn %xmm3, %xmm2 1457; X86-SSE2-NEXT: por %xmm1, %xmm2 1458; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 1459; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1460; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 1461; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1462; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1463; X86-SSE2-NEXT: pand %xmm1, %xmm5 1464; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1465; X86-SSE2-NEXT: por %xmm5, %xmm1 1466; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1467; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1468; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1469; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 1470; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1471; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm2 1472; X86-SSE2-NEXT: pand %xmm2, %xmm1 1473; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1474; X86-SSE2-NEXT: por %xmm1, %xmm2 1475; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1476; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 1477; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1478; X86-SSE2-NEXT: pxor %xmm0, %xmm4 1479; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1480; X86-SSE2-NEXT: pand %xmm1, %xmm2 1481; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1482; X86-SSE2-NEXT: por %xmm2, %xmm1 1483; X86-SSE2-NEXT: movd %xmm1, %eax 1484; X86-SSE2-NEXT: retl 1485; 1486; X86-SSE42-LABEL: test_reduce_v16i32: 1487; X86-SSE42: ## %bb.0: 1488; X86-SSE42-NEXT: pmaxud %xmm3, %xmm1 1489; X86-SSE42-NEXT: pmaxud %xmm2, %xmm1 1490; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1 1491; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1492; X86-SSE42-NEXT: pmaxud %xmm1, %xmm0 1493; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1494; X86-SSE42-NEXT: pmaxud %xmm0, %xmm1 1495; X86-SSE42-NEXT: movd %xmm1, %eax 1496; X86-SSE42-NEXT: retl 1497; 1498; X86-AVX1-LABEL: test_reduce_v16i32: 1499; X86-AVX1: ## %bb.0: 1500; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1501; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1502; X86-AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 1503; X86-AVX1-NEXT: vpmaxud %xmm2, %xmm1, %xmm1 1504; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1505; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1506; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1507; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1508; X86-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1509; X86-AVX1-NEXT: vmovd %xmm0, %eax 1510; X86-AVX1-NEXT: vzeroupper 1511; X86-AVX1-NEXT: retl 1512; 1513; X86-AVX2-LABEL: test_reduce_v16i32: 1514; X86-AVX2: ## %bb.0: 1515; X86-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 1516; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1517; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1518; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1519; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1520; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1521; X86-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1522; X86-AVX2-NEXT: vmovd %xmm0, %eax 1523; X86-AVX2-NEXT: vzeroupper 1524; X86-AVX2-NEXT: retl 1525; 1526; X64-SSE2-LABEL: test_reduce_v16i32: 1527; X64-SSE2: ## %bb.0: 1528; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1529; X64-SSE2-NEXT: movdqa %xmm2, %xmm6 1530; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1531; X64-SSE2-NEXT: movdqa %xmm0, %xmm5 1532; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1533; X64-SSE2-NEXT: pcmpgtd %xmm6, %xmm5 1534; X64-SSE2-NEXT: pand %xmm5, %xmm0 1535; X64-SSE2-NEXT: pandn %xmm2, %xmm5 1536; X64-SSE2-NEXT: por %xmm0, %xmm5 1537; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1538; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1539; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1540; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1541; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1542; X64-SSE2-NEXT: pand %xmm2, %xmm1 1543; X64-SSE2-NEXT: pandn %xmm3, %xmm2 1544; X64-SSE2-NEXT: por %xmm1, %xmm2 1545; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1546; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1547; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1548; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1549; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1550; X64-SSE2-NEXT: pand %xmm1, %xmm5 1551; X64-SSE2-NEXT: pandn %xmm2, %xmm1 1552; X64-SSE2-NEXT: por %xmm5, %xmm1 1553; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1554; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1555; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1556; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 1557; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1558; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm2 1559; X64-SSE2-NEXT: pand %xmm2, %xmm1 1560; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1561; X64-SSE2-NEXT: por %xmm1, %xmm2 1562; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1563; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 1564; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1565; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1566; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1567; X64-SSE2-NEXT: pand %xmm1, %xmm2 1568; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1569; X64-SSE2-NEXT: por %xmm2, %xmm1 1570; X64-SSE2-NEXT: movd %xmm1, %eax 1571; X64-SSE2-NEXT: retq 1572; 1573; X64-SSE42-LABEL: test_reduce_v16i32: 1574; X64-SSE42: ## %bb.0: 1575; X64-SSE42-NEXT: pmaxud %xmm3, %xmm1 1576; X64-SSE42-NEXT: pmaxud %xmm2, %xmm1 1577; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1 1578; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1579; X64-SSE42-NEXT: pmaxud %xmm1, %xmm0 1580; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1581; X64-SSE42-NEXT: pmaxud %xmm0, %xmm1 1582; X64-SSE42-NEXT: movd %xmm1, %eax 1583; X64-SSE42-NEXT: retq 1584; 1585; X64-AVX1-LABEL: test_reduce_v16i32: 1586; X64-AVX1: ## %bb.0: 1587; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1588; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1589; X64-AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 1590; X64-AVX1-NEXT: vpmaxud %xmm2, %xmm1, %xmm1 1591; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1592; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1593; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1594; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1595; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1596; X64-AVX1-NEXT: vmovd %xmm0, %eax 1597; X64-AVX1-NEXT: vzeroupper 1598; X64-AVX1-NEXT: retq 1599; 1600; X64-AVX2-LABEL: test_reduce_v16i32: 1601; X64-AVX2: ## %bb.0: 1602; X64-AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 1603; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1604; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1605; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1606; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1607; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1608; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1609; X64-AVX2-NEXT: vmovd %xmm0, %eax 1610; X64-AVX2-NEXT: vzeroupper 1611; X64-AVX2-NEXT: retq 1612; 1613; X64-AVX512-LABEL: test_reduce_v16i32: 1614; X64-AVX512: ## %bb.0: 1615; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1616; X64-AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0 1617; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1618; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1619; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1620; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1621; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1622; X64-AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 1623; X64-AVX512-NEXT: vmovd %xmm0, %eax 1624; X64-AVX512-NEXT: vzeroupper 1625; X64-AVX512-NEXT: retq 1626 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1627 %2 = icmp ugt <16 x i32> %a0, %1 1628 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1629 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1630 %5 = icmp ugt <16 x i32> %3, %4 1631 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1632 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1633 %8 = icmp ugt <16 x i32> %6, %7 1634 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1635 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1636 %11 = icmp ugt <16 x i32> %9, %10 1637 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1638 %13 = extractelement <16 x i32> %12, i32 0 1639 ret i32 %13 1640} 1641 1642define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1643; X86-SSE2-LABEL: test_reduce_v32i16: 1644; X86-SSE2: ## %bb.0: 1645; X86-SSE2-NEXT: psubusw %xmm0, %xmm2 1646; X86-SSE2-NEXT: paddw %xmm0, %xmm2 1647; X86-SSE2-NEXT: psubusw %xmm1, %xmm3 1648; X86-SSE2-NEXT: paddw %xmm1, %xmm3 1649; X86-SSE2-NEXT: psubusw %xmm2, %xmm3 1650; X86-SSE2-NEXT: paddw %xmm2, %xmm3 1651; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 1652; X86-SSE2-NEXT: psubusw %xmm3, %xmm0 1653; X86-SSE2-NEXT: paddw %xmm3, %xmm0 1654; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1655; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 1656; X86-SSE2-NEXT: paddw %xmm0, %xmm1 1657; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1658; X86-SSE2-NEXT: psrld $16, %xmm0 1659; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 1660; X86-SSE2-NEXT: paddw %xmm1, %xmm0 1661; X86-SSE2-NEXT: movd %xmm0, %eax 1662; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1663; X86-SSE2-NEXT: retl 1664; 1665; X86-SSE42-LABEL: test_reduce_v32i16: 1666; X86-SSE42: ## %bb.0: 1667; X86-SSE42-NEXT: pmaxuw %xmm3, %xmm1 1668; X86-SSE42-NEXT: pmaxuw %xmm2, %xmm1 1669; X86-SSE42-NEXT: pmaxuw %xmm0, %xmm1 1670; X86-SSE42-NEXT: pcmpeqd %xmm0, %xmm0 1671; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1672; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1673; X86-SSE42-NEXT: movd %xmm0, %eax 1674; X86-SSE42-NEXT: notl %eax 1675; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1676; X86-SSE42-NEXT: retl 1677; 1678; X86-AVX1-LABEL: test_reduce_v32i16: 1679; X86-AVX1: ## %bb.0: 1680; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1681; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1682; X86-AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 1683; X86-AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm1 1684; X86-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1685; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1686; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1687; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1688; X86-AVX1-NEXT: vmovd %xmm0, %eax 1689; X86-AVX1-NEXT: notl %eax 1690; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1691; X86-AVX1-NEXT: vzeroupper 1692; X86-AVX1-NEXT: retl 1693; 1694; X86-AVX2-LABEL: test_reduce_v32i16: 1695; X86-AVX2: ## %bb.0: 1696; X86-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 1697; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1698; X86-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1699; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1700; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1701; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1702; X86-AVX2-NEXT: vmovd %xmm0, %eax 1703; X86-AVX2-NEXT: notl %eax 1704; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1705; X86-AVX2-NEXT: vzeroupper 1706; X86-AVX2-NEXT: retl 1707; 1708; X64-SSE2-LABEL: test_reduce_v32i16: 1709; X64-SSE2: ## %bb.0: 1710; X64-SSE2-NEXT: psubusw %xmm0, %xmm2 1711; X64-SSE2-NEXT: paddw %xmm0, %xmm2 1712; X64-SSE2-NEXT: psubusw %xmm1, %xmm3 1713; X64-SSE2-NEXT: paddw %xmm1, %xmm3 1714; X64-SSE2-NEXT: psubusw %xmm2, %xmm3 1715; X64-SSE2-NEXT: paddw %xmm2, %xmm3 1716; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3] 1717; X64-SSE2-NEXT: psubusw %xmm3, %xmm0 1718; X64-SSE2-NEXT: paddw %xmm3, %xmm0 1719; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1720; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 1721; X64-SSE2-NEXT: paddw %xmm0, %xmm1 1722; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1723; X64-SSE2-NEXT: psrld $16, %xmm0 1724; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 1725; X64-SSE2-NEXT: paddw %xmm1, %xmm0 1726; X64-SSE2-NEXT: movd %xmm0, %eax 1727; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1728; X64-SSE2-NEXT: retq 1729; 1730; X64-SSE42-LABEL: test_reduce_v32i16: 1731; X64-SSE42: ## %bb.0: 1732; X64-SSE42-NEXT: pmaxuw %xmm3, %xmm1 1733; X64-SSE42-NEXT: pmaxuw %xmm2, %xmm1 1734; X64-SSE42-NEXT: pmaxuw %xmm0, %xmm1 1735; X64-SSE42-NEXT: pcmpeqd %xmm0, %xmm0 1736; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1737; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1738; X64-SSE42-NEXT: movd %xmm0, %eax 1739; X64-SSE42-NEXT: notl %eax 1740; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1741; X64-SSE42-NEXT: retq 1742; 1743; X64-AVX1-LABEL: test_reduce_v32i16: 1744; X64-AVX1: ## %bb.0: 1745; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1746; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1747; X64-AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 1748; X64-AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm1 1749; X64-AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1750; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1751; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1752; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1753; X64-AVX1-NEXT: vmovd %xmm0, %eax 1754; X64-AVX1-NEXT: notl %eax 1755; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1756; X64-AVX1-NEXT: vzeroupper 1757; X64-AVX1-NEXT: retq 1758; 1759; X64-AVX2-LABEL: test_reduce_v32i16: 1760; X64-AVX2: ## %bb.0: 1761; X64-AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 1762; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1763; X64-AVX2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1764; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1765; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1766; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1767; X64-AVX2-NEXT: vmovd %xmm0, %eax 1768; X64-AVX2-NEXT: notl %eax 1769; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1770; X64-AVX2-NEXT: vzeroupper 1771; X64-AVX2-NEXT: retq 1772; 1773; X64-AVX512-LABEL: test_reduce_v32i16: 1774; X64-AVX512: ## %bb.0: 1775; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1776; X64-AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 1777; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1778; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 1779; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1780; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1781; X64-AVX512-NEXT: vmovd %xmm0, %eax 1782; X64-AVX512-NEXT: notl %eax 1783; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1784; X64-AVX512-NEXT: vzeroupper 1785; X64-AVX512-NEXT: retq 1786 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1787 %2 = icmp ugt <32 x i16> %a0, %1 1788 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1789 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1790 %5 = icmp ugt <32 x i16> %3, %4 1791 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1792 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1793 %8 = icmp ugt <32 x i16> %6, %7 1794 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1795 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1796 %11 = icmp ugt <32 x i16> %9, %10 1797 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1798 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1799 %14 = icmp ugt <32 x i16> %12, %13 1800 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1801 %16 = extractelement <32 x i16> %15, i32 0 1802 ret i16 %16 1803} 1804 1805define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1806; X86-SSE2-LABEL: test_reduce_v64i8: 1807; X86-SSE2: ## %bb.0: 1808; X86-SSE2-NEXT: pmaxub %xmm3, %xmm1 1809; X86-SSE2-NEXT: pmaxub %xmm2, %xmm1 1810; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 1811; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1812; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 1813; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1814; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 1815; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1816; X86-SSE2-NEXT: psrld $16, %xmm0 1817; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 1818; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1819; X86-SSE2-NEXT: psrlw $8, %xmm1 1820; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 1821; X86-SSE2-NEXT: movd %xmm1, %eax 1822; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1823; X86-SSE2-NEXT: retl 1824; 1825; X86-SSE42-LABEL: test_reduce_v64i8: 1826; X86-SSE42: ## %bb.0: 1827; X86-SSE42-NEXT: pmaxub %xmm3, %xmm1 1828; X86-SSE42-NEXT: pmaxub %xmm2, %xmm1 1829; X86-SSE42-NEXT: pmaxub %xmm0, %xmm1 1830; X86-SSE42-NEXT: pcmpeqd %xmm0, %xmm0 1831; X86-SSE42-NEXT: pxor %xmm1, %xmm0 1832; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 1833; X86-SSE42-NEXT: psrlw $8, %xmm1 1834; X86-SSE42-NEXT: pminub %xmm0, %xmm1 1835; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 1836; X86-SSE42-NEXT: movd %xmm0, %eax 1837; X86-SSE42-NEXT: notb %al 1838; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1839; X86-SSE42-NEXT: retl 1840; 1841; X86-AVX1-LABEL: test_reduce_v64i8: 1842; X86-AVX1: ## %bb.0: 1843; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1844; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1845; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 1846; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm1 1847; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1848; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1849; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1850; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1851; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1852; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1853; X86-AVX1-NEXT: vmovd %xmm0, %eax 1854; X86-AVX1-NEXT: notb %al 1855; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1856; X86-AVX1-NEXT: vzeroupper 1857; X86-AVX1-NEXT: retl 1858; 1859; X86-AVX2-LABEL: test_reduce_v64i8: 1860; X86-AVX2: ## %bb.0: 1861; X86-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 1862; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1863; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1864; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1865; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1866; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1867; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1868; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1869; X86-AVX2-NEXT: vmovd %xmm0, %eax 1870; X86-AVX2-NEXT: notb %al 1871; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1872; X86-AVX2-NEXT: vzeroupper 1873; X86-AVX2-NEXT: retl 1874; 1875; X64-SSE2-LABEL: test_reduce_v64i8: 1876; X64-SSE2: ## %bb.0: 1877; X64-SSE2-NEXT: pmaxub %xmm3, %xmm1 1878; X64-SSE2-NEXT: pmaxub %xmm2, %xmm1 1879; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 1880; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1881; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1882; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1883; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 1884; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1885; X64-SSE2-NEXT: psrld $16, %xmm0 1886; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 1887; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1888; X64-SSE2-NEXT: psrlw $8, %xmm1 1889; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 1890; X64-SSE2-NEXT: movd %xmm1, %eax 1891; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1892; X64-SSE2-NEXT: retq 1893; 1894; X64-SSE42-LABEL: test_reduce_v64i8: 1895; X64-SSE42: ## %bb.0: 1896; X64-SSE42-NEXT: pmaxub %xmm3, %xmm1 1897; X64-SSE42-NEXT: pmaxub %xmm2, %xmm1 1898; X64-SSE42-NEXT: pmaxub %xmm0, %xmm1 1899; X64-SSE42-NEXT: pcmpeqd %xmm0, %xmm0 1900; X64-SSE42-NEXT: pxor %xmm1, %xmm0 1901; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 1902; X64-SSE42-NEXT: psrlw $8, %xmm1 1903; X64-SSE42-NEXT: pminub %xmm0, %xmm1 1904; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 1905; X64-SSE42-NEXT: movd %xmm0, %eax 1906; X64-SSE42-NEXT: notb %al 1907; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1908; X64-SSE42-NEXT: retq 1909; 1910; X64-AVX1-LABEL: test_reduce_v64i8: 1911; X64-AVX1: ## %bb.0: 1912; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1913; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1914; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 1915; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm1, %xmm1 1916; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1917; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1918; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 1919; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1920; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1921; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1922; X64-AVX1-NEXT: vmovd %xmm0, %eax 1923; X64-AVX1-NEXT: notb %al 1924; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1925; X64-AVX1-NEXT: vzeroupper 1926; X64-AVX1-NEXT: retq 1927; 1928; X64-AVX2-LABEL: test_reduce_v64i8: 1929; X64-AVX2: ## %bb.0: 1930; X64-AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 1931; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1932; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1933; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 1934; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 1935; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1936; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1937; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1938; X64-AVX2-NEXT: vmovd %xmm0, %eax 1939; X64-AVX2-NEXT: notb %al 1940; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1941; X64-AVX2-NEXT: vzeroupper 1942; X64-AVX2-NEXT: retq 1943; 1944; X64-AVX512-LABEL: test_reduce_v64i8: 1945; X64-AVX512: ## %bb.0: 1946; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1947; X64-AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 1948; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1949; X64-AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 1950; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 1951; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1952; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1953; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1954; X64-AVX512-NEXT: vmovd %xmm0, %eax 1955; X64-AVX512-NEXT: notb %al 1956; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1957; X64-AVX512-NEXT: vzeroupper 1958; X64-AVX512-NEXT: retq 1959 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1960 %2 = icmp ugt <64 x i8> %a0, %1 1961 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1962 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1963 %5 = icmp ugt <64 x i8> %3, %4 1964 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1965 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1966 %8 = icmp ugt <64 x i8> %6, %7 1967 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1968 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1969 %11 = icmp ugt <64 x i8> %9, %10 1970 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1971 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1972 %14 = icmp ugt <64 x i8> %12, %13 1973 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1974 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1975 %17 = icmp ugt <64 x i8> %15, %16 1976 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1977 %19 = extractelement <64 x i8> %18, i32 0 1978 ret i8 %19 1979} 1980 1981; 1982; Partial Vector Reductions 1983; 1984 1985define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { 1986; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: 1987; X86-SSE2: ## %bb.0: 1988; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1989; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 1990; X86-SSE2-NEXT: paddw %xmm0, %xmm1 1991; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1992; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 1993; X86-SSE2-NEXT: paddw %xmm1, %xmm0 1994; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1995; X86-SSE2-NEXT: psrld $16, %xmm1 1996; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 1997; X86-SSE2-NEXT: paddw %xmm0, %xmm1 1998; X86-SSE2-NEXT: movd %xmm1, %eax 1999; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2000; X86-SSE2-NEXT: retl 2001; 2002; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: 2003; X86-SSE42: ## %bb.0: 2004; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2005; X86-SSE42-NEXT: pxor %xmm0, %xmm1 2006; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2007; X86-SSE42-NEXT: movd %xmm0, %eax 2008; X86-SSE42-NEXT: notl %eax 2009; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2010; X86-SSE42-NEXT: retl 2011; 2012; X86-AVX-LABEL: test_reduce_v16i16_v8i16: 2013; X86-AVX: ## %bb.0: 2014; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2015; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 2016; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2017; X86-AVX-NEXT: vmovd %xmm0, %eax 2018; X86-AVX-NEXT: notl %eax 2019; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 2020; X86-AVX-NEXT: vzeroupper 2021; X86-AVX-NEXT: retl 2022; 2023; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: 2024; X64-SSE2: ## %bb.0: 2025; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2026; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 2027; X64-SSE2-NEXT: paddw %xmm0, %xmm1 2028; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2029; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 2030; X64-SSE2-NEXT: paddw %xmm1, %xmm0 2031; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2032; X64-SSE2-NEXT: psrld $16, %xmm1 2033; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 2034; X64-SSE2-NEXT: paddw %xmm0, %xmm1 2035; X64-SSE2-NEXT: movd %xmm1, %eax 2036; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2037; X64-SSE2-NEXT: retq 2038; 2039; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: 2040; X64-SSE42: ## %bb.0: 2041; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2042; X64-SSE42-NEXT: pxor %xmm0, %xmm1 2043; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2044; X64-SSE42-NEXT: movd %xmm0, %eax 2045; X64-SSE42-NEXT: notl %eax 2046; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2047; X64-SSE42-NEXT: retq 2048; 2049; X64-AVX1-LABEL: test_reduce_v16i16_v8i16: 2050; X64-AVX1: ## %bb.0: 2051; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2052; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 2053; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 2054; X64-AVX1-NEXT: vmovd %xmm0, %eax 2055; X64-AVX1-NEXT: notl %eax 2056; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 2057; X64-AVX1-NEXT: vzeroupper 2058; X64-AVX1-NEXT: retq 2059; 2060; X64-AVX2-LABEL: test_reduce_v16i16_v8i16: 2061; X64-AVX2: ## %bb.0: 2062; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2063; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 2064; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 2065; X64-AVX2-NEXT: vmovd %xmm0, %eax 2066; X64-AVX2-NEXT: notl %eax 2067; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 2068; X64-AVX2-NEXT: vzeroupper 2069; X64-AVX2-NEXT: retq 2070; 2071; X64-AVX512-LABEL: test_reduce_v16i16_v8i16: 2072; X64-AVX512: ## %bb.0: 2073; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 2074; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2075; X64-AVX512-NEXT: vmovd %xmm0, %eax 2076; X64-AVX512-NEXT: notl %eax 2077; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 2078; X64-AVX512-NEXT: vzeroupper 2079; X64-AVX512-NEXT: retq 2080 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2081 %2 = icmp ugt <16 x i16> %a0, %1 2082 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 2083 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2084 %5 = icmp ugt <16 x i16> %3, %4 2085 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 2086 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2087 %8 = icmp ugt <16 x i16> %6, %7 2088 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 2089 %10 = extractelement <16 x i16> %9, i32 0 2090 ret i16 %10 2091} 2092 2093define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { 2094; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: 2095; X86-SSE2: ## %bb.0: 2096; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2097; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 2098; X86-SSE2-NEXT: paddw %xmm0, %xmm1 2099; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2100; X86-SSE2-NEXT: psubusw %xmm1, %xmm0 2101; X86-SSE2-NEXT: paddw %xmm1, %xmm0 2102; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2103; X86-SSE2-NEXT: psrld $16, %xmm1 2104; X86-SSE2-NEXT: psubusw %xmm0, %xmm1 2105; X86-SSE2-NEXT: paddw %xmm0, %xmm1 2106; X86-SSE2-NEXT: movd %xmm1, %eax 2107; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2108; X86-SSE2-NEXT: retl 2109; 2110; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: 2111; X86-SSE42: ## %bb.0: 2112; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2113; X86-SSE42-NEXT: pxor %xmm0, %xmm1 2114; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2115; X86-SSE42-NEXT: movd %xmm0, %eax 2116; X86-SSE42-NEXT: notl %eax 2117; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2118; X86-SSE42-NEXT: retl 2119; 2120; X86-AVX-LABEL: test_reduce_v32i16_v8i16: 2121; X86-AVX: ## %bb.0: 2122; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2123; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 2124; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2125; X86-AVX-NEXT: vmovd %xmm0, %eax 2126; X86-AVX-NEXT: notl %eax 2127; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 2128; X86-AVX-NEXT: vzeroupper 2129; X86-AVX-NEXT: retl 2130; 2131; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: 2132; X64-SSE2: ## %bb.0: 2133; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2134; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 2135; X64-SSE2-NEXT: paddw %xmm0, %xmm1 2136; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2137; X64-SSE2-NEXT: psubusw %xmm1, %xmm0 2138; X64-SSE2-NEXT: paddw %xmm1, %xmm0 2139; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2140; X64-SSE2-NEXT: psrld $16, %xmm1 2141; X64-SSE2-NEXT: psubusw %xmm0, %xmm1 2142; X64-SSE2-NEXT: paddw %xmm0, %xmm1 2143; X64-SSE2-NEXT: movd %xmm1, %eax 2144; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2145; X64-SSE2-NEXT: retq 2146; 2147; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: 2148; X64-SSE42: ## %bb.0: 2149; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2150; X64-SSE42-NEXT: pxor %xmm0, %xmm1 2151; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2152; X64-SSE42-NEXT: movd %xmm0, %eax 2153; X64-SSE42-NEXT: notl %eax 2154; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2155; X64-SSE42-NEXT: retq 2156; 2157; X64-AVX1-LABEL: test_reduce_v32i16_v8i16: 2158; X64-AVX1: ## %bb.0: 2159; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2160; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 2161; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 2162; X64-AVX1-NEXT: vmovd %xmm0, %eax 2163; X64-AVX1-NEXT: notl %eax 2164; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 2165; X64-AVX1-NEXT: vzeroupper 2166; X64-AVX1-NEXT: retq 2167; 2168; X64-AVX2-LABEL: test_reduce_v32i16_v8i16: 2169; X64-AVX2: ## %bb.0: 2170; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2171; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 2172; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 2173; X64-AVX2-NEXT: vmovd %xmm0, %eax 2174; X64-AVX2-NEXT: notl %eax 2175; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 2176; X64-AVX2-NEXT: vzeroupper 2177; X64-AVX2-NEXT: retq 2178; 2179; X64-AVX512-LABEL: test_reduce_v32i16_v8i16: 2180; X64-AVX512: ## %bb.0: 2181; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 2182; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2183; X64-AVX512-NEXT: vmovd %xmm0, %eax 2184; X64-AVX512-NEXT: notl %eax 2185; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 2186; X64-AVX512-NEXT: vzeroupper 2187; X64-AVX512-NEXT: retq 2188 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2189 %2 = icmp ugt <32 x i16> %a0, %1 2190 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 2191 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2192 %5 = icmp ugt <32 x i16> %3, %4 2193 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 2194 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2195 %8 = icmp ugt <32 x i16> %6, %7 2196 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 2197 %10 = extractelement <32 x i16> %9, i32 0 2198 ret i16 %10 2199} 2200 2201define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { 2202; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: 2203; X86-SSE2: ## %bb.0: 2204; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2205; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 2206; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2207; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 2208; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2209; X86-SSE2-NEXT: psrld $16, %xmm1 2210; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 2211; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2212; X86-SSE2-NEXT: psrlw $8, %xmm0 2213; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 2214; X86-SSE2-NEXT: movd %xmm0, %eax 2215; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2216; X86-SSE2-NEXT: retl 2217; 2218; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: 2219; X86-SSE42: ## %bb.0: 2220; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2221; X86-SSE42-NEXT: pxor %xmm0, %xmm1 2222; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 2223; X86-SSE42-NEXT: psrlw $8, %xmm0 2224; X86-SSE42-NEXT: pminub %xmm1, %xmm0 2225; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 2226; X86-SSE42-NEXT: movd %xmm0, %eax 2227; X86-SSE42-NEXT: notb %al 2228; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2229; X86-SSE42-NEXT: retl 2230; 2231; X86-AVX-LABEL: test_reduce_v32i8_v16i8: 2232; X86-AVX: ## %bb.0: 2233; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2234; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 2235; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2236; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2237; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2238; X86-AVX-NEXT: vmovd %xmm0, %eax 2239; X86-AVX-NEXT: notb %al 2240; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2241; X86-AVX-NEXT: vzeroupper 2242; X86-AVX-NEXT: retl 2243; 2244; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: 2245; X64-SSE2: ## %bb.0: 2246; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2247; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 2248; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2249; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 2250; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2251; X64-SSE2-NEXT: psrld $16, %xmm1 2252; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 2253; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2254; X64-SSE2-NEXT: psrlw $8, %xmm0 2255; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 2256; X64-SSE2-NEXT: movd %xmm0, %eax 2257; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2258; X64-SSE2-NEXT: retq 2259; 2260; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: 2261; X64-SSE42: ## %bb.0: 2262; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2263; X64-SSE42-NEXT: pxor %xmm0, %xmm1 2264; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 2265; X64-SSE42-NEXT: psrlw $8, %xmm0 2266; X64-SSE42-NEXT: pminub %xmm1, %xmm0 2267; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 2268; X64-SSE42-NEXT: movd %xmm0, %eax 2269; X64-SSE42-NEXT: notb %al 2270; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2271; X64-SSE42-NEXT: retq 2272; 2273; X64-AVX1-LABEL: test_reduce_v32i8_v16i8: 2274; X64-AVX1: ## %bb.0: 2275; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2276; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 2277; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 2278; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 2279; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 2280; X64-AVX1-NEXT: vmovd %xmm0, %eax 2281; X64-AVX1-NEXT: notb %al 2282; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 2283; X64-AVX1-NEXT: vzeroupper 2284; X64-AVX1-NEXT: retq 2285; 2286; X64-AVX2-LABEL: test_reduce_v32i8_v16i8: 2287; X64-AVX2: ## %bb.0: 2288; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2289; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 2290; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 2291; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 2292; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 2293; X64-AVX2-NEXT: vmovd %xmm0, %eax 2294; X64-AVX2-NEXT: notb %al 2295; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 2296; X64-AVX2-NEXT: vzeroupper 2297; X64-AVX2-NEXT: retq 2298; 2299; X64-AVX512-LABEL: test_reduce_v32i8_v16i8: 2300; X64-AVX512: ## %bb.0: 2301; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 2302; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 2303; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 2304; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2305; X64-AVX512-NEXT: vmovd %xmm0, %eax 2306; X64-AVX512-NEXT: notb %al 2307; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 2308; X64-AVX512-NEXT: vzeroupper 2309; X64-AVX512-NEXT: retq 2310 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2311 %2 = icmp ugt <32 x i8> %a0, %1 2312 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 2313 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2314 %5 = icmp ugt <32 x i8> %3, %4 2315 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 2316 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2317 %8 = icmp ugt <32 x i8> %6, %7 2318 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 2319 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2320 %11 = icmp ugt <32 x i8> %9, %10 2321 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 2322 %13 = extractelement <32 x i8> %12, i32 0 2323 ret i8 %13 2324} 2325 2326define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { 2327; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: 2328; X86-SSE2: ## %bb.0: 2329; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2330; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 2331; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2332; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 2333; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2334; X86-SSE2-NEXT: psrld $16, %xmm1 2335; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 2336; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2337; X86-SSE2-NEXT: psrlw $8, %xmm0 2338; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 2339; X86-SSE2-NEXT: movd %xmm0, %eax 2340; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2341; X86-SSE2-NEXT: retl 2342; 2343; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: 2344; X86-SSE42: ## %bb.0: 2345; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2346; X86-SSE42-NEXT: pxor %xmm0, %xmm1 2347; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 2348; X86-SSE42-NEXT: psrlw $8, %xmm0 2349; X86-SSE42-NEXT: pminub %xmm1, %xmm0 2350; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 2351; X86-SSE42-NEXT: movd %xmm0, %eax 2352; X86-SSE42-NEXT: notb %al 2353; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2354; X86-SSE42-NEXT: retl 2355; 2356; X86-AVX-LABEL: test_reduce_v64i8_v16i8: 2357; X86-AVX: ## %bb.0: 2358; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2359; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 2360; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2361; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2362; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2363; X86-AVX-NEXT: vmovd %xmm0, %eax 2364; X86-AVX-NEXT: notb %al 2365; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2366; X86-AVX-NEXT: vzeroupper 2367; X86-AVX-NEXT: retl 2368; 2369; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: 2370; X64-SSE2: ## %bb.0: 2371; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2372; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 2373; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2374; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 2375; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2376; X64-SSE2-NEXT: psrld $16, %xmm1 2377; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 2378; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2379; X64-SSE2-NEXT: psrlw $8, %xmm0 2380; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 2381; X64-SSE2-NEXT: movd %xmm0, %eax 2382; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2383; X64-SSE2-NEXT: retq 2384; 2385; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: 2386; X64-SSE42: ## %bb.0: 2387; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 2388; X64-SSE42-NEXT: pxor %xmm0, %xmm1 2389; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 2390; X64-SSE42-NEXT: psrlw $8, %xmm0 2391; X64-SSE42-NEXT: pminub %xmm1, %xmm0 2392; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 2393; X64-SSE42-NEXT: movd %xmm0, %eax 2394; X64-SSE42-NEXT: notb %al 2395; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2396; X64-SSE42-NEXT: retq 2397; 2398; X64-AVX1-LABEL: test_reduce_v64i8_v16i8: 2399; X64-AVX1: ## %bb.0: 2400; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2401; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 2402; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 2403; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 2404; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 2405; X64-AVX1-NEXT: vmovd %xmm0, %eax 2406; X64-AVX1-NEXT: notb %al 2407; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 2408; X64-AVX1-NEXT: vzeroupper 2409; X64-AVX1-NEXT: retq 2410; 2411; X64-AVX2-LABEL: test_reduce_v64i8_v16i8: 2412; X64-AVX2: ## %bb.0: 2413; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 2414; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 2415; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 2416; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 2417; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 2418; X64-AVX2-NEXT: vmovd %xmm0, %eax 2419; X64-AVX2-NEXT: notb %al 2420; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 2421; X64-AVX2-NEXT: vzeroupper 2422; X64-AVX2-NEXT: retq 2423; 2424; X64-AVX512-LABEL: test_reduce_v64i8_v16i8: 2425; X64-AVX512: ## %bb.0: 2426; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 2427; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 2428; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 2429; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 2430; X64-AVX512-NEXT: vmovd %xmm0, %eax 2431; X64-AVX512-NEXT: notb %al 2432; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 2433; X64-AVX512-NEXT: vzeroupper 2434; X64-AVX512-NEXT: retq 2435 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2436 %2 = icmp ugt <64 x i8> %a0, %1 2437 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 2438 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2439 %5 = icmp ugt <64 x i8> %3, %4 2440 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 2441 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2442 %8 = icmp ugt <64 x i8> %6, %7 2443 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 2444 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2445 %11 = icmp ugt <64 x i8> %9, %10 2446 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 2447 %13 = extractelement <64 x i8> %12, i32 0 2448 ret i8 %13 2449} 2450