1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86-SSE42 4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42 8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2 10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 11 12; 13; 128-bit Vectors 14; 15 16define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17; X86-SSE2-LABEL: test_reduce_v2i64: 18; X86-SSE2: ## %bb.0: 19; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 25; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29; X86-SSE2-NEXT: pand %xmm5, %xmm2 30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31; X86-SSE2-NEXT: por %xmm2, %xmm3 32; X86-SSE2-NEXT: pand %xmm3, %xmm0 33; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34; X86-SSE2-NEXT: por %xmm0, %xmm3 35; X86-SSE2-NEXT: movd %xmm3, %eax 36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 37; X86-SSE2-NEXT: movd %xmm0, %edx 38; X86-SSE2-NEXT: retl 39; 40; X86-SSE42-LABEL: test_reduce_v2i64: 41; X86-SSE42: ## %bb.0: 42; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 44; X86-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648] 45; X86-SSE42-NEXT: movdqa %xmm1, %xmm3 46; X86-SSE42-NEXT: pxor %xmm0, %xmm3 47; X86-SSE42-NEXT: pxor %xmm2, %xmm0 48; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 49; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 50; X86-SSE42-NEXT: movd %xmm2, %eax 51; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 52; X86-SSE42-NEXT: retl 53; 54; X86-AVX1-LABEL: test_reduce_v2i64: 55; X86-AVX1: ## %bb.0: 56; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 57; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 58; X86-AVX1-NEXT: ## xmm2 = mem[0,0] 59; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3 60; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm2 61; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 62; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 63; X86-AVX1-NEXT: vmovd %xmm0, %eax 64; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 65; X86-AVX1-NEXT: retl 66; 67; X86-AVX2-LABEL: test_reduce_v2i64: 68; X86-AVX2: ## %bb.0: 69; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 70; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 71; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 72; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 73; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 74; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 75; X86-AVX2-NEXT: vmovd %xmm0, %eax 76; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 77; X86-AVX2-NEXT: retl 78; 79; X64-SSE2-LABEL: test_reduce_v2i64: 80; X64-SSE2: ## %bb.0: 81; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 82; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 83; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 84; X64-SSE2-NEXT: pxor %xmm2, %xmm3 85; X64-SSE2-NEXT: pxor %xmm1, %xmm2 86; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 87; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 88; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 89; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 90; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 91; X64-SSE2-NEXT: pand %xmm5, %xmm2 92; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 93; X64-SSE2-NEXT: por %xmm2, %xmm3 94; X64-SSE2-NEXT: pand %xmm3, %xmm0 95; X64-SSE2-NEXT: pandn %xmm1, %xmm3 96; X64-SSE2-NEXT: por %xmm0, %xmm3 97; X64-SSE2-NEXT: movq %xmm3, %rax 98; X64-SSE2-NEXT: retq 99; 100; X64-SSE42-LABEL: test_reduce_v2i64: 101; X64-SSE42: ## %bb.0: 102; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 103; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 104; X64-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 105; X64-SSE42-NEXT: movdqa %xmm1, %xmm3 106; X64-SSE42-NEXT: pxor %xmm0, %xmm3 107; X64-SSE42-NEXT: pxor %xmm2, %xmm0 108; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 109; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 110; X64-SSE42-NEXT: movq %xmm2, %rax 111; X64-SSE42-NEXT: retq 112; 113; X64-AVX1-LABEL: test_reduce_v2i64: 114; X64-AVX1: ## %bb.0: 115; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 116; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 117; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3 118; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2 119; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 120; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 121; X64-AVX1-NEXT: vmovq %xmm0, %rax 122; X64-AVX1-NEXT: retq 123; 124; X64-AVX2-LABEL: test_reduce_v2i64: 125; X64-AVX2: ## %bb.0: 126; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 127; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 128; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 129; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2 130; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 131; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 132; X64-AVX2-NEXT: vmovq %xmm0, %rax 133; X64-AVX2-NEXT: retq 134; 135; X64-AVX512-LABEL: test_reduce_v2i64: 136; X64-AVX512: ## %bb.0: 137; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 138; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 139; X64-AVX512-NEXT: vmovq %xmm0, %rax 140; X64-AVX512-NEXT: retq 141 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 142 %2 = icmp ult <2 x i64> %a0, %1 143 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 144 %4 = extractelement <2 x i64> %3, i32 0 145 ret i64 %4 146} 147 148define i32 @test_reduce_v4i32(<4 x i32> %a0) { 149; X86-SSE2-LABEL: test_reduce_v4i32: 150; X86-SSE2: ## %bb.0: 151; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 152; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 153; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 154; X86-SSE2-NEXT: pxor %xmm2, %xmm3 155; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 156; X86-SSE2-NEXT: pxor %xmm2, %xmm4 157; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 158; X86-SSE2-NEXT: pand %xmm4, %xmm0 159; X86-SSE2-NEXT: pandn %xmm1, %xmm4 160; X86-SSE2-NEXT: por %xmm0, %xmm4 161; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,1,1] 162; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 163; X86-SSE2-NEXT: pxor %xmm2, %xmm1 164; X86-SSE2-NEXT: pxor %xmm0, %xmm2 165; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 166; X86-SSE2-NEXT: pand %xmm2, %xmm4 167; X86-SSE2-NEXT: pandn %xmm0, %xmm2 168; X86-SSE2-NEXT: por %xmm4, %xmm2 169; X86-SSE2-NEXT: movd %xmm2, %eax 170; X86-SSE2-NEXT: retl 171; 172; X86-SSE42-LABEL: test_reduce_v4i32: 173; X86-SSE42: ## %bb.0: 174; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 175; X86-SSE42-NEXT: pminud %xmm0, %xmm1 176; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 177; X86-SSE42-NEXT: pminud %xmm1, %xmm0 178; X86-SSE42-NEXT: movd %xmm0, %eax 179; X86-SSE42-NEXT: retl 180; 181; X86-AVX-LABEL: test_reduce_v4i32: 182; X86-AVX: ## %bb.0: 183; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 184; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 185; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 186; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 187; X86-AVX-NEXT: vmovd %xmm0, %eax 188; X86-AVX-NEXT: retl 189; 190; X64-SSE2-LABEL: test_reduce_v4i32: 191; X64-SSE2: ## %bb.0: 192; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 193; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 194; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 195; X64-SSE2-NEXT: pxor %xmm2, %xmm3 196; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 197; X64-SSE2-NEXT: pxor %xmm2, %xmm4 198; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 199; X64-SSE2-NEXT: pand %xmm4, %xmm0 200; X64-SSE2-NEXT: pandn %xmm1, %xmm4 201; X64-SSE2-NEXT: por %xmm0, %xmm4 202; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,1,1] 203; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 204; X64-SSE2-NEXT: pxor %xmm2, %xmm1 205; X64-SSE2-NEXT: pxor %xmm0, %xmm2 206; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 207; X64-SSE2-NEXT: pand %xmm2, %xmm4 208; X64-SSE2-NEXT: pandn %xmm0, %xmm2 209; X64-SSE2-NEXT: por %xmm4, %xmm2 210; X64-SSE2-NEXT: movd %xmm2, %eax 211; X64-SSE2-NEXT: retq 212; 213; X64-SSE42-LABEL: test_reduce_v4i32: 214; X64-SSE42: ## %bb.0: 215; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 216; X64-SSE42-NEXT: pminud %xmm0, %xmm1 217; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 218; X64-SSE42-NEXT: pminud %xmm1, %xmm0 219; X64-SSE42-NEXT: movd %xmm0, %eax 220; X64-SSE42-NEXT: retq 221; 222; X64-AVX-LABEL: test_reduce_v4i32: 223; X64-AVX: ## %bb.0: 224; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 225; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 226; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 227; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 228; X64-AVX-NEXT: vmovd %xmm0, %eax 229; X64-AVX-NEXT: retq 230 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 231 %2 = icmp ult <4 x i32> %a0, %1 232 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 233 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 234 %5 = icmp ult <4 x i32> %3, %4 235 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 236 %7 = extractelement <4 x i32> %6, i32 0 237 ret i32 %7 238} 239 240define i16 @test_reduce_v8i16(<8 x i16> %a0) { 241; X86-SSE2-LABEL: test_reduce_v8i16: 242; X86-SSE2: ## %bb.0: 243; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 244; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 245; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 246; X86-SSE2-NEXT: psubw %xmm2, %xmm0 247; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 248; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 249; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 250; X86-SSE2-NEXT: psubw %xmm2, %xmm0 251; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 252; X86-SSE2-NEXT: psrld $16, %xmm1 253; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 254; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 255; X86-SSE2-NEXT: psubw %xmm2, %xmm0 256; X86-SSE2-NEXT: movd %xmm0, %eax 257; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 258; X86-SSE2-NEXT: retl 259; 260; X86-SSE42-LABEL: test_reduce_v8i16: 261; X86-SSE42: ## %bb.0: 262; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 263; X86-SSE42-NEXT: movd %xmm0, %eax 264; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 265; X86-SSE42-NEXT: retl 266; 267; X86-AVX-LABEL: test_reduce_v8i16: 268; X86-AVX: ## %bb.0: 269; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 270; X86-AVX-NEXT: vmovd %xmm0, %eax 271; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 272; X86-AVX-NEXT: retl 273; 274; X64-SSE2-LABEL: test_reduce_v8i16: 275; X64-SSE2: ## %bb.0: 276; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 277; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 278; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 279; X64-SSE2-NEXT: psubw %xmm2, %xmm0 280; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 281; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 282; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 283; X64-SSE2-NEXT: psubw %xmm2, %xmm0 284; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 285; X64-SSE2-NEXT: psrld $16, %xmm1 286; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 287; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 288; X64-SSE2-NEXT: psubw %xmm2, %xmm0 289; X64-SSE2-NEXT: movd %xmm0, %eax 290; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 291; X64-SSE2-NEXT: retq 292; 293; X64-SSE42-LABEL: test_reduce_v8i16: 294; X64-SSE42: ## %bb.0: 295; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 296; X64-SSE42-NEXT: movd %xmm0, %eax 297; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 298; X64-SSE42-NEXT: retq 299; 300; X64-AVX-LABEL: test_reduce_v8i16: 301; X64-AVX: ## %bb.0: 302; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 303; X64-AVX-NEXT: vmovd %xmm0, %eax 304; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 305; X64-AVX-NEXT: retq 306 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 307 %2 = icmp ult <8 x i16> %a0, %1 308 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 309 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 310 %5 = icmp ult <8 x i16> %3, %4 311 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 312 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 313 %8 = icmp ult <8 x i16> %6, %7 314 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 315 %10 = extractelement <8 x i16> %9, i32 0 316 ret i16 %10 317} 318 319define i8 @test_reduce_v16i8(<16 x i8> %a0) { 320; X86-SSE2-LABEL: test_reduce_v16i8: 321; X86-SSE2: ## %bb.0: 322; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 323; X86-SSE2-NEXT: pminub %xmm0, %xmm1 324; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 325; X86-SSE2-NEXT: pminub %xmm1, %xmm0 326; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 327; X86-SSE2-NEXT: psrld $16, %xmm1 328; X86-SSE2-NEXT: pminub %xmm0, %xmm1 329; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 330; X86-SSE2-NEXT: psrlw $8, %xmm0 331; X86-SSE2-NEXT: pminub %xmm1, %xmm0 332; X86-SSE2-NEXT: movd %xmm0, %eax 333; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 334; X86-SSE2-NEXT: retl 335; 336; X86-SSE42-LABEL: test_reduce_v16i8: 337; X86-SSE42: ## %bb.0: 338; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 339; X86-SSE42-NEXT: psrlw $8, %xmm1 340; X86-SSE42-NEXT: pminub %xmm0, %xmm1 341; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 342; X86-SSE42-NEXT: movd %xmm0, %eax 343; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 344; X86-SSE42-NEXT: retl 345; 346; X86-AVX-LABEL: test_reduce_v16i8: 347; X86-AVX: ## %bb.0: 348; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 349; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 350; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 351; X86-AVX-NEXT: vmovd %xmm0, %eax 352; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 353; X86-AVX-NEXT: retl 354; 355; X64-SSE2-LABEL: test_reduce_v16i8: 356; X64-SSE2: ## %bb.0: 357; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 358; X64-SSE2-NEXT: pminub %xmm0, %xmm1 359; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 360; X64-SSE2-NEXT: pminub %xmm1, %xmm0 361; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 362; X64-SSE2-NEXT: psrld $16, %xmm1 363; X64-SSE2-NEXT: pminub %xmm0, %xmm1 364; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 365; X64-SSE2-NEXT: psrlw $8, %xmm0 366; X64-SSE2-NEXT: pminub %xmm1, %xmm0 367; X64-SSE2-NEXT: movd %xmm0, %eax 368; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 369; X64-SSE2-NEXT: retq 370; 371; X64-SSE42-LABEL: test_reduce_v16i8: 372; X64-SSE42: ## %bb.0: 373; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 374; X64-SSE42-NEXT: psrlw $8, %xmm1 375; X64-SSE42-NEXT: pminub %xmm0, %xmm1 376; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 377; X64-SSE42-NEXT: movd %xmm0, %eax 378; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 379; X64-SSE42-NEXT: retq 380; 381; X64-AVX-LABEL: test_reduce_v16i8: 382; X64-AVX: ## %bb.0: 383; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 384; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 385; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 386; X64-AVX-NEXT: vmovd %xmm0, %eax 387; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 388; X64-AVX-NEXT: retq 389 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 390 %2 = icmp ult <16 x i8> %a0, %1 391 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 392 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 393 %5 = icmp ult <16 x i8> %3, %4 394 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 395 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 396 %8 = icmp ult <16 x i8> %6, %7 397 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 398 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 399 %11 = icmp ult <16 x i8> %9, %10 400 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 401 %13 = extractelement <16 x i8> %12, i32 0 402 ret i8 %13 403} 404 405; 406; 256-bit Vectors 407; 408 409define i64 @test_reduce_v4i64(<4 x i64> %a0) { 410; X86-SSE2-LABEL: test_reduce_v4i64: 411; X86-SSE2: ## %bb.0: 412; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 413; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 414; X86-SSE2-NEXT: pxor %xmm2, %xmm3 415; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 416; X86-SSE2-NEXT: pxor %xmm2, %xmm4 417; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 418; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 419; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 420; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 421; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 422; X86-SSE2-NEXT: pand %xmm6, %xmm3 423; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 424; X86-SSE2-NEXT: por %xmm3, %xmm4 425; X86-SSE2-NEXT: pand %xmm4, %xmm0 426; X86-SSE2-NEXT: pandn %xmm1, %xmm4 427; X86-SSE2-NEXT: por %xmm0, %xmm4 428; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 429; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 430; X86-SSE2-NEXT: pxor %xmm2, %xmm1 431; X86-SSE2-NEXT: pxor %xmm0, %xmm2 432; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 433; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 434; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 435; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 436; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 437; X86-SSE2-NEXT: pand %xmm5, %xmm1 438; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 439; X86-SSE2-NEXT: por %xmm1, %xmm2 440; X86-SSE2-NEXT: pand %xmm2, %xmm4 441; X86-SSE2-NEXT: pandn %xmm0, %xmm2 442; X86-SSE2-NEXT: por %xmm4, %xmm2 443; X86-SSE2-NEXT: movd %xmm2, %eax 444; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 445; X86-SSE2-NEXT: movd %xmm0, %edx 446; X86-SSE2-NEXT: retl 447; 448; X86-SSE42-LABEL: test_reduce_v4i64: 449; X86-SSE42: ## %bb.0: 450; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 451; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] 452; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 453; X86-SSE42-NEXT: pxor %xmm3, %xmm4 454; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 455; X86-SSE42-NEXT: pxor %xmm3, %xmm0 456; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 457; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 458; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 459; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 460; X86-SSE42-NEXT: pxor %xmm3, %xmm0 461; X86-SSE42-NEXT: pxor %xmm2, %xmm3 462; X86-SSE42-NEXT: pcmpgtq %xmm0, %xmm3 463; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 464; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 465; X86-SSE42-NEXT: movd %xmm2, %eax 466; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 467; X86-SSE42-NEXT: retl 468; 469; X86-AVX1-LABEL: test_reduce_v4i64: 470; X86-AVX1: ## %bb.0: 471; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 472; X86-AVX1-NEXT: ## xmm1 = mem[0,0] 473; X86-AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm2 474; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 475; X86-AVX1-NEXT: vxorps %xmm1, %xmm3, %xmm4 476; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 477; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 478; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[2,3,2,3] 479; X86-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3 480; X86-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1 481; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 482; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 483; X86-AVX1-NEXT: vmovd %xmm0, %eax 484; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 485; X86-AVX1-NEXT: vzeroupper 486; X86-AVX1-NEXT: retl 487; 488; X86-AVX2-LABEL: test_reduce_v4i64: 489; X86-AVX2: ## %bb.0: 490; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 491; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 492; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 493; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4 494; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 495; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 496; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 497; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 498; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 499; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 500; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 501; X86-AVX2-NEXT: vmovd %xmm0, %eax 502; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 503; X86-AVX2-NEXT: vzeroupper 504; X86-AVX2-NEXT: retl 505; 506; X64-SSE2-LABEL: test_reduce_v4i64: 507; X64-SSE2: ## %bb.0: 508; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] 509; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 510; X64-SSE2-NEXT: pxor %xmm2, %xmm3 511; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 512; X64-SSE2-NEXT: pxor %xmm2, %xmm4 513; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 514; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 515; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 516; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 517; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 518; X64-SSE2-NEXT: pand %xmm6, %xmm3 519; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 520; X64-SSE2-NEXT: por %xmm3, %xmm4 521; X64-SSE2-NEXT: pand %xmm4, %xmm0 522; X64-SSE2-NEXT: pandn %xmm1, %xmm4 523; X64-SSE2-NEXT: por %xmm0, %xmm4 524; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 525; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 526; X64-SSE2-NEXT: pxor %xmm2, %xmm1 527; X64-SSE2-NEXT: pxor %xmm0, %xmm2 528; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 529; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 530; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 531; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 532; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 533; X64-SSE2-NEXT: pand %xmm5, %xmm1 534; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 535; X64-SSE2-NEXT: por %xmm1, %xmm2 536; X64-SSE2-NEXT: pand %xmm2, %xmm4 537; X64-SSE2-NEXT: pandn %xmm0, %xmm2 538; X64-SSE2-NEXT: por %xmm4, %xmm2 539; X64-SSE2-NEXT: movq %xmm2, %rax 540; X64-SSE2-NEXT: retq 541; 542; X64-SSE42-LABEL: test_reduce_v4i64: 543; X64-SSE42: ## %bb.0: 544; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 545; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 546; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 547; X64-SSE42-NEXT: pxor %xmm3, %xmm4 548; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 549; X64-SSE42-NEXT: pxor %xmm3, %xmm0 550; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 551; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 552; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 553; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 554; X64-SSE42-NEXT: pxor %xmm3, %xmm0 555; X64-SSE42-NEXT: pxor %xmm2, %xmm3 556; X64-SSE42-NEXT: pcmpgtq %xmm0, %xmm3 557; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 558; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 559; X64-SSE42-NEXT: movq %xmm2, %rax 560; X64-SSE42-NEXT: retq 561; 562; X64-AVX1-LABEL: test_reduce_v4i64: 563; X64-AVX1: ## %bb.0: 564; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] 565; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2 566; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 567; X64-AVX1-NEXT: vpxor %xmm1, %xmm3, %xmm4 568; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 569; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0 570; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[2,3,2,3] 571; X64-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3 572; X64-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1 573; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1 574; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 575; X64-AVX1-NEXT: vmovq %xmm0, %rax 576; X64-AVX1-NEXT: vzeroupper 577; X64-AVX1-NEXT: retq 578; 579; X64-AVX2-LABEL: test_reduce_v4i64: 580; X64-AVX2: ## %bb.0: 581; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 582; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 583; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3 584; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4 585; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 586; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 587; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 588; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 589; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 590; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 591; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 592; X64-AVX2-NEXT: vmovq %xmm0, %rax 593; X64-AVX2-NEXT: vzeroupper 594; X64-AVX2-NEXT: retq 595; 596; X64-AVX512-LABEL: test_reduce_v4i64: 597; X64-AVX512: ## %bb.0: 598; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 599; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 600; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 601; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 602; X64-AVX512-NEXT: vmovq %xmm0, %rax 603; X64-AVX512-NEXT: vzeroupper 604; X64-AVX512-NEXT: retq 605 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 606 %2 = icmp ult <4 x i64> %a0, %1 607 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 608 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 609 %5 = icmp ult <4 x i64> %3, %4 610 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 611 %7 = extractelement <4 x i64> %6, i32 0 612 ret i64 %7 613} 614 615define i32 @test_reduce_v8i32(<8 x i32> %a0) { 616; X86-SSE2-LABEL: test_reduce_v8i32: 617; X86-SSE2: ## %bb.0: 618; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 619; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 620; X86-SSE2-NEXT: pxor %xmm2, %xmm3 621; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 622; X86-SSE2-NEXT: pxor %xmm2, %xmm4 623; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 624; X86-SSE2-NEXT: pand %xmm4, %xmm0 625; X86-SSE2-NEXT: pandn %xmm1, %xmm4 626; X86-SSE2-NEXT: por %xmm0, %xmm4 627; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 628; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 629; X86-SSE2-NEXT: pxor %xmm2, %xmm1 630; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 631; X86-SSE2-NEXT: pxor %xmm2, %xmm3 632; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 633; X86-SSE2-NEXT: pand %xmm3, %xmm4 634; X86-SSE2-NEXT: pandn %xmm0, %xmm3 635; X86-SSE2-NEXT: por %xmm4, %xmm3 636; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 637; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 638; X86-SSE2-NEXT: pxor %xmm2, %xmm1 639; X86-SSE2-NEXT: pxor %xmm0, %xmm2 640; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 641; X86-SSE2-NEXT: pand %xmm2, %xmm3 642; X86-SSE2-NEXT: pandn %xmm0, %xmm2 643; X86-SSE2-NEXT: por %xmm3, %xmm2 644; X86-SSE2-NEXT: movd %xmm2, %eax 645; X86-SSE2-NEXT: retl 646; 647; X86-SSE42-LABEL: test_reduce_v8i32: 648; X86-SSE42: ## %bb.0: 649; X86-SSE42-NEXT: pminud %xmm1, %xmm0 650; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 651; X86-SSE42-NEXT: pminud %xmm0, %xmm1 652; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 653; X86-SSE42-NEXT: pminud %xmm1, %xmm0 654; X86-SSE42-NEXT: movd %xmm0, %eax 655; X86-SSE42-NEXT: retl 656; 657; X86-AVX1-LABEL: test_reduce_v8i32: 658; X86-AVX1: ## %bb.0: 659; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 660; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 661; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 662; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 663; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 664; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 665; X86-AVX1-NEXT: vmovd %xmm0, %eax 666; X86-AVX1-NEXT: vzeroupper 667; X86-AVX1-NEXT: retl 668; 669; X86-AVX2-LABEL: test_reduce_v8i32: 670; X86-AVX2: ## %bb.0: 671; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 672; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 673; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 674; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 675; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 676; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 677; X86-AVX2-NEXT: vmovd %xmm0, %eax 678; X86-AVX2-NEXT: vzeroupper 679; X86-AVX2-NEXT: retl 680; 681; X64-SSE2-LABEL: test_reduce_v8i32: 682; X64-SSE2: ## %bb.0: 683; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 684; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 685; X64-SSE2-NEXT: pxor %xmm2, %xmm3 686; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 687; X64-SSE2-NEXT: pxor %xmm2, %xmm4 688; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 689; X64-SSE2-NEXT: pand %xmm4, %xmm0 690; X64-SSE2-NEXT: pandn %xmm1, %xmm4 691; X64-SSE2-NEXT: por %xmm0, %xmm4 692; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 693; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 694; X64-SSE2-NEXT: pxor %xmm2, %xmm1 695; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 696; X64-SSE2-NEXT: pxor %xmm2, %xmm3 697; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 698; X64-SSE2-NEXT: pand %xmm3, %xmm4 699; X64-SSE2-NEXT: pandn %xmm0, %xmm3 700; X64-SSE2-NEXT: por %xmm4, %xmm3 701; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 702; X64-SSE2-NEXT: movdqa %xmm3, %xmm1 703; X64-SSE2-NEXT: pxor %xmm2, %xmm1 704; X64-SSE2-NEXT: pxor %xmm0, %xmm2 705; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 706; X64-SSE2-NEXT: pand %xmm2, %xmm3 707; X64-SSE2-NEXT: pandn %xmm0, %xmm2 708; X64-SSE2-NEXT: por %xmm3, %xmm2 709; X64-SSE2-NEXT: movd %xmm2, %eax 710; X64-SSE2-NEXT: retq 711; 712; X64-SSE42-LABEL: test_reduce_v8i32: 713; X64-SSE42: ## %bb.0: 714; X64-SSE42-NEXT: pminud %xmm1, %xmm0 715; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 716; X64-SSE42-NEXT: pminud %xmm0, %xmm1 717; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 718; X64-SSE42-NEXT: pminud %xmm1, %xmm0 719; X64-SSE42-NEXT: movd %xmm0, %eax 720; X64-SSE42-NEXT: retq 721; 722; X64-AVX1-LABEL: test_reduce_v8i32: 723; X64-AVX1: ## %bb.0: 724; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 725; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 726; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 727; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 728; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 729; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 730; X64-AVX1-NEXT: vmovd %xmm0, %eax 731; X64-AVX1-NEXT: vzeroupper 732; X64-AVX1-NEXT: retq 733; 734; X64-AVX2-LABEL: test_reduce_v8i32: 735; X64-AVX2: ## %bb.0: 736; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 737; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 738; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 739; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 740; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 741; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 742; X64-AVX2-NEXT: vmovd %xmm0, %eax 743; X64-AVX2-NEXT: vzeroupper 744; X64-AVX2-NEXT: retq 745; 746; X64-AVX512-LABEL: test_reduce_v8i32: 747; X64-AVX512: ## %bb.0: 748; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 749; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 750; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 751; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 752; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 753; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 754; X64-AVX512-NEXT: vmovd %xmm0, %eax 755; X64-AVX512-NEXT: vzeroupper 756; X64-AVX512-NEXT: retq 757 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 758 %2 = icmp ult <8 x i32> %a0, %1 759 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 760 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 761 %5 = icmp ult <8 x i32> %3, %4 762 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 763 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 764 %8 = icmp ult <8 x i32> %6, %7 765 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 766 %10 = extractelement <8 x i32> %9, i32 0 767 ret i32 %10 768} 769 770define i16 @test_reduce_v16i16(<16 x i16> %a0) { 771; X86-SSE2-LABEL: test_reduce_v16i16: 772; X86-SSE2: ## %bb.0: 773; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 774; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 775; X86-SSE2-NEXT: psubw %xmm2, %xmm0 776; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 777; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 778; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 779; X86-SSE2-NEXT: psubw %xmm2, %xmm0 780; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 781; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 782; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 783; X86-SSE2-NEXT: psubw %xmm2, %xmm0 784; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 785; X86-SSE2-NEXT: psrld $16, %xmm1 786; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 787; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 788; X86-SSE2-NEXT: psubw %xmm2, %xmm0 789; X86-SSE2-NEXT: movd %xmm0, %eax 790; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 791; X86-SSE2-NEXT: retl 792; 793; X86-SSE42-LABEL: test_reduce_v16i16: 794; X86-SSE42: ## %bb.0: 795; X86-SSE42-NEXT: pminuw %xmm1, %xmm0 796; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 797; X86-SSE42-NEXT: movd %xmm0, %eax 798; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 799; X86-SSE42-NEXT: retl 800; 801; X86-AVX1-LABEL: test_reduce_v16i16: 802; X86-AVX1: ## %bb.0: 803; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 804; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 805; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 806; X86-AVX1-NEXT: vmovd %xmm0, %eax 807; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 808; X86-AVX1-NEXT: vzeroupper 809; X86-AVX1-NEXT: retl 810; 811; X86-AVX2-LABEL: test_reduce_v16i16: 812; X86-AVX2: ## %bb.0: 813; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 814; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 815; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 816; X86-AVX2-NEXT: vmovd %xmm0, %eax 817; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 818; X86-AVX2-NEXT: vzeroupper 819; X86-AVX2-NEXT: retl 820; 821; X64-SSE2-LABEL: test_reduce_v16i16: 822; X64-SSE2: ## %bb.0: 823; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 824; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 825; X64-SSE2-NEXT: psubw %xmm2, %xmm0 826; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 827; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 828; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 829; X64-SSE2-NEXT: psubw %xmm2, %xmm0 830; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 831; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 832; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 833; X64-SSE2-NEXT: psubw %xmm2, %xmm0 834; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 835; X64-SSE2-NEXT: psrld $16, %xmm1 836; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 837; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 838; X64-SSE2-NEXT: psubw %xmm2, %xmm0 839; X64-SSE2-NEXT: movd %xmm0, %eax 840; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 841; X64-SSE2-NEXT: retq 842; 843; X64-SSE42-LABEL: test_reduce_v16i16: 844; X64-SSE42: ## %bb.0: 845; X64-SSE42-NEXT: pminuw %xmm1, %xmm0 846; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 847; X64-SSE42-NEXT: movd %xmm0, %eax 848; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 849; X64-SSE42-NEXT: retq 850; 851; X64-AVX1-LABEL: test_reduce_v16i16: 852; X64-AVX1: ## %bb.0: 853; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 854; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 855; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 856; X64-AVX1-NEXT: vmovd %xmm0, %eax 857; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 858; X64-AVX1-NEXT: vzeroupper 859; X64-AVX1-NEXT: retq 860; 861; X64-AVX2-LABEL: test_reduce_v16i16: 862; X64-AVX2: ## %bb.0: 863; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 864; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 865; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 866; X64-AVX2-NEXT: vmovd %xmm0, %eax 867; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 868; X64-AVX2-NEXT: vzeroupper 869; X64-AVX2-NEXT: retq 870; 871; X64-AVX512-LABEL: test_reduce_v16i16: 872; X64-AVX512: ## %bb.0: 873; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 874; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0 875; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 876; X64-AVX512-NEXT: vmovd %xmm0, %eax 877; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 878; X64-AVX512-NEXT: vzeroupper 879; X64-AVX512-NEXT: retq 880 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 881 %2 = icmp ult <16 x i16> %a0, %1 882 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 883 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 884 %5 = icmp ult <16 x i16> %3, %4 885 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 886 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 887 %8 = icmp ult <16 x i16> %6, %7 888 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 889 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 890 %11 = icmp ult <16 x i16> %9, %10 891 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 892 %13 = extractelement <16 x i16> %12, i32 0 893 ret i16 %13 894} 895 896define i8 @test_reduce_v32i8(<32 x i8> %a0) { 897; X86-SSE2-LABEL: test_reduce_v32i8: 898; X86-SSE2: ## %bb.0: 899; X86-SSE2-NEXT: pminub %xmm1, %xmm0 900; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 901; X86-SSE2-NEXT: pminub %xmm0, %xmm1 902; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 903; X86-SSE2-NEXT: pminub %xmm1, %xmm0 904; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 905; X86-SSE2-NEXT: psrld $16, %xmm1 906; X86-SSE2-NEXT: pminub %xmm0, %xmm1 907; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 908; X86-SSE2-NEXT: psrlw $8, %xmm0 909; X86-SSE2-NEXT: pminub %xmm1, %xmm0 910; X86-SSE2-NEXT: movd %xmm0, %eax 911; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 912; X86-SSE2-NEXT: retl 913; 914; X86-SSE42-LABEL: test_reduce_v32i8: 915; X86-SSE42: ## %bb.0: 916; X86-SSE42-NEXT: pminub %xmm1, %xmm0 917; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 918; X86-SSE42-NEXT: psrlw $8, %xmm1 919; X86-SSE42-NEXT: pminub %xmm0, %xmm1 920; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 921; X86-SSE42-NEXT: movd %xmm0, %eax 922; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 923; X86-SSE42-NEXT: retl 924; 925; X86-AVX1-LABEL: test_reduce_v32i8: 926; X86-AVX1: ## %bb.0: 927; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 928; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 929; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 930; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 931; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 932; X86-AVX1-NEXT: vmovd %xmm0, %eax 933; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 934; X86-AVX1-NEXT: vzeroupper 935; X86-AVX1-NEXT: retl 936; 937; X86-AVX2-LABEL: test_reduce_v32i8: 938; X86-AVX2: ## %bb.0: 939; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 940; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 941; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 942; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 943; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 944; X86-AVX2-NEXT: vmovd %xmm0, %eax 945; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 946; X86-AVX2-NEXT: vzeroupper 947; X86-AVX2-NEXT: retl 948; 949; X64-SSE2-LABEL: test_reduce_v32i8: 950; X64-SSE2: ## %bb.0: 951; X64-SSE2-NEXT: pminub %xmm1, %xmm0 952; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 953; X64-SSE2-NEXT: pminub %xmm0, %xmm1 954; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 955; X64-SSE2-NEXT: pminub %xmm1, %xmm0 956; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 957; X64-SSE2-NEXT: psrld $16, %xmm1 958; X64-SSE2-NEXT: pminub %xmm0, %xmm1 959; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 960; X64-SSE2-NEXT: psrlw $8, %xmm0 961; X64-SSE2-NEXT: pminub %xmm1, %xmm0 962; X64-SSE2-NEXT: movd %xmm0, %eax 963; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 964; X64-SSE2-NEXT: retq 965; 966; X64-SSE42-LABEL: test_reduce_v32i8: 967; X64-SSE42: ## %bb.0: 968; X64-SSE42-NEXT: pminub %xmm1, %xmm0 969; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 970; X64-SSE42-NEXT: psrlw $8, %xmm1 971; X64-SSE42-NEXT: pminub %xmm0, %xmm1 972; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 973; X64-SSE42-NEXT: movd %xmm0, %eax 974; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 975; X64-SSE42-NEXT: retq 976; 977; X64-AVX1-LABEL: test_reduce_v32i8: 978; X64-AVX1: ## %bb.0: 979; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 980; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 981; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 982; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 983; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 984; X64-AVX1-NEXT: vmovd %xmm0, %eax 985; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 986; X64-AVX1-NEXT: vzeroupper 987; X64-AVX1-NEXT: retq 988; 989; X64-AVX2-LABEL: test_reduce_v32i8: 990; X64-AVX2: ## %bb.0: 991; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 992; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 993; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 994; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 995; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 996; X64-AVX2-NEXT: vmovd %xmm0, %eax 997; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 998; X64-AVX2-NEXT: vzeroupper 999; X64-AVX2-NEXT: retq 1000; 1001; X64-AVX512-LABEL: test_reduce_v32i8: 1002; X64-AVX512: ## %bb.0: 1003; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1004; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1005; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1006; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1007; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1008; X64-AVX512-NEXT: vmovd %xmm0, %eax 1009; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1010; X64-AVX512-NEXT: vzeroupper 1011; X64-AVX512-NEXT: retq 1012 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1013 %2 = icmp ult <32 x i8> %a0, %1 1014 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1015 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1016 %5 = icmp ult <32 x i8> %3, %4 1017 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1018 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1019 %8 = icmp ult <32 x i8> %6, %7 1020 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1021 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1022 %11 = icmp ult <32 x i8> %9, %10 1023 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1024 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1025 %14 = icmp ult <32 x i8> %12, %13 1026 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1027 %16 = extractelement <32 x i8> %15, i32 0 1028 ret i8 %16 1029} 1030 1031; 1032; 512-bit Vectors 1033; 1034 1035define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1036; X86-SSE2-LABEL: test_reduce_v8i64: 1037; X86-SSE2: ## %bb.0: 1038; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1039; X86-SSE2-NEXT: movdqa %xmm1, %xmm5 1040; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1041; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1042; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1043; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1044; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1045; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1046; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1047; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1048; X86-SSE2-NEXT: pand %xmm5, %xmm6 1049; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1050; X86-SSE2-NEXT: por %xmm6, %xmm5 1051; X86-SSE2-NEXT: pand %xmm5, %xmm1 1052; X86-SSE2-NEXT: pandn %xmm3, %xmm5 1053; X86-SSE2-NEXT: por %xmm1, %xmm5 1054; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1055; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1056; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1057; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1058; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1059; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1060; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1061; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2] 1062; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1063; X86-SSE2-NEXT: pand %xmm1, %xmm3 1064; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] 1065; X86-SSE2-NEXT: por %xmm3, %xmm1 1066; X86-SSE2-NEXT: pand %xmm1, %xmm0 1067; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1068; X86-SSE2-NEXT: por %xmm0, %xmm1 1069; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1070; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1071; X86-SSE2-NEXT: movdqa %xmm5, %xmm2 1072; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1073; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1074; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1075; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1076; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1077; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1078; X86-SSE2-NEXT: pand %xmm0, %xmm2 1079; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1080; X86-SSE2-NEXT: por %xmm2, %xmm0 1081; X86-SSE2-NEXT: pand %xmm0, %xmm1 1082; X86-SSE2-NEXT: pandn %xmm5, %xmm0 1083; X86-SSE2-NEXT: por %xmm1, %xmm0 1084; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1085; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1086; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1087; X86-SSE2-NEXT: pxor %xmm1, %xmm4 1088; X86-SSE2-NEXT: movdqa %xmm4, %xmm3 1089; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1090; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1091; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1092; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1093; X86-SSE2-NEXT: pand %xmm2, %xmm4 1094; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1095; X86-SSE2-NEXT: por %xmm4, %xmm2 1096; X86-SSE2-NEXT: pand %xmm2, %xmm0 1097; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1098; X86-SSE2-NEXT: por %xmm0, %xmm2 1099; X86-SSE2-NEXT: movd %xmm2, %eax 1100; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1101; X86-SSE2-NEXT: movd %xmm0, %edx 1102; X86-SSE2-NEXT: retl 1103; 1104; X86-SSE42-LABEL: test_reduce_v8i64: 1105; X86-SSE42: ## %bb.0: 1106; X86-SSE42-NEXT: movdqa %xmm0, %xmm5 1107; X86-SSE42-NEXT: movdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648] 1108; X86-SSE42-NEXT: movdqa %xmm1, %xmm6 1109; X86-SSE42-NEXT: pxor %xmm4, %xmm6 1110; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1111; X86-SSE42-NEXT: pxor %xmm4, %xmm0 1112; X86-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1113; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1114; X86-SSE42-NEXT: movdqa %xmm5, %xmm1 1115; X86-SSE42-NEXT: pxor %xmm4, %xmm1 1116; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 1117; X86-SSE42-NEXT: pxor %xmm4, %xmm0 1118; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1119; X86-SSE42-NEXT: blendvpd %xmm0, %xmm5, %xmm2 1120; X86-SSE42-NEXT: movapd %xmm2, %xmm1 1121; X86-SSE42-NEXT: xorpd %xmm4, %xmm1 1122; X86-SSE42-NEXT: movapd %xmm3, %xmm0 1123; X86-SSE42-NEXT: xorpd %xmm4, %xmm0 1124; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1125; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1126; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1127; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1128; X86-SSE42-NEXT: pxor %xmm4, %xmm0 1129; X86-SSE42-NEXT: pxor %xmm1, %xmm4 1130; X86-SSE42-NEXT: pcmpgtq %xmm0, %xmm4 1131; X86-SSE42-NEXT: movdqa %xmm4, %xmm0 1132; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1133; X86-SSE42-NEXT: movd %xmm1, %eax 1134; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1135; X86-SSE42-NEXT: retl 1136; 1137; X86-AVX1-LABEL: test_reduce_v8i64: 1138; X86-AVX1: ## %bb.0: 1139; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1140; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 1141; X86-AVX1-NEXT: ## xmm3 = mem[0,0] 1142; X86-AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm4 1143; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 1144; X86-AVX1-NEXT: vxorps %xmm3, %xmm5, %xmm6 1145; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 1146; X86-AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm6 1147; X86-AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm7 1148; X86-AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 1149; X86-AVX1-NEXT: vblendvpd %xmm6, %xmm0, %xmm1, %xmm0 1150; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm1 1151; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm5, %xmm2 1152; X86-AVX1-NEXT: vxorpd %xmm3, %xmm2, %xmm4 1153; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 1154; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 1155; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1156; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 1157; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3 1158; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1159; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1160; X86-AVX1-NEXT: vmovd %xmm0, %eax 1161; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1162; X86-AVX1-NEXT: vzeroupper 1163; X86-AVX1-NEXT: retl 1164; 1165; X86-AVX2-LABEL: test_reduce_v8i64: 1166; X86-AVX2: ## %bb.0: 1167; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1168; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 1169; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4 1170; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1171; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1172; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1173; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1174; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4 1175; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1176; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1177; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1178; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1179; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1180; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1181; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1182; X86-AVX2-NEXT: vmovd %xmm0, %eax 1183; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1184; X86-AVX2-NEXT: vzeroupper 1185; X86-AVX2-NEXT: retl 1186; 1187; X64-SSE2-LABEL: test_reduce_v8i64: 1188; X64-SSE2: ## %bb.0: 1189; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456] 1190; X64-SSE2-NEXT: movdqa %xmm1, %xmm5 1191; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1192; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1193; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1194; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1195; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1196; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1197; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1198; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1199; X64-SSE2-NEXT: pand %xmm8, %xmm6 1200; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1201; X64-SSE2-NEXT: por %xmm6, %xmm5 1202; X64-SSE2-NEXT: pand %xmm5, %xmm1 1203; X64-SSE2-NEXT: pandn %xmm3, %xmm5 1204; X64-SSE2-NEXT: por %xmm1, %xmm5 1205; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1206; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1207; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1208; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1209; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1210; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1211; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1212; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1213; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1214; X64-SSE2-NEXT: pand %xmm7, %xmm1 1215; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1216; X64-SSE2-NEXT: por %xmm1, %xmm3 1217; X64-SSE2-NEXT: pand %xmm3, %xmm0 1218; X64-SSE2-NEXT: pandn %xmm2, %xmm3 1219; X64-SSE2-NEXT: por %xmm0, %xmm3 1220; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1221; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1222; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1223; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1224; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1225; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1226; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2] 1227; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1228; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1229; X64-SSE2-NEXT: pand %xmm6, %xmm0 1230; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1231; X64-SSE2-NEXT: por %xmm0, %xmm1 1232; X64-SSE2-NEXT: pand %xmm1, %xmm3 1233; X64-SSE2-NEXT: pandn %xmm5, %xmm1 1234; X64-SSE2-NEXT: por %xmm3, %xmm1 1235; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1236; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1237; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1238; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1239; X64-SSE2-NEXT: movdqa %xmm4, %xmm3 1240; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1241; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1242; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1243; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1244; X64-SSE2-NEXT: pand %xmm5, %xmm2 1245; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1246; X64-SSE2-NEXT: por %xmm2, %xmm3 1247; X64-SSE2-NEXT: pand %xmm3, %xmm1 1248; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1249; X64-SSE2-NEXT: por %xmm1, %xmm3 1250; X64-SSE2-NEXT: movq %xmm3, %rax 1251; X64-SSE2-NEXT: retq 1252; 1253; X64-SSE42-LABEL: test_reduce_v8i64: 1254; X64-SSE42: ## %bb.0: 1255; X64-SSE42-NEXT: movdqa %xmm0, %xmm5 1256; X64-SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] 1257; X64-SSE42-NEXT: movdqa %xmm1, %xmm6 1258; X64-SSE42-NEXT: pxor %xmm4, %xmm6 1259; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1260; X64-SSE42-NEXT: pxor %xmm4, %xmm0 1261; X64-SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1262; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1263; X64-SSE42-NEXT: movdqa %xmm5, %xmm1 1264; X64-SSE42-NEXT: pxor %xmm4, %xmm1 1265; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 1266; X64-SSE42-NEXT: pxor %xmm4, %xmm0 1267; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1268; X64-SSE42-NEXT: blendvpd %xmm0, %xmm5, %xmm2 1269; X64-SSE42-NEXT: movapd %xmm2, %xmm1 1270; X64-SSE42-NEXT: xorpd %xmm4, %xmm1 1271; X64-SSE42-NEXT: movapd %xmm3, %xmm0 1272; X64-SSE42-NEXT: xorpd %xmm4, %xmm0 1273; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1274; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1275; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1276; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1277; X64-SSE42-NEXT: pxor %xmm4, %xmm0 1278; X64-SSE42-NEXT: pxor %xmm1, %xmm4 1279; X64-SSE42-NEXT: pcmpgtq %xmm0, %xmm4 1280; X64-SSE42-NEXT: movdqa %xmm4, %xmm0 1281; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1282; X64-SSE42-NEXT: movq %xmm1, %rax 1283; X64-SSE42-NEXT: retq 1284; 1285; X64-AVX1-LABEL: test_reduce_v8i64: 1286; X64-AVX1: ## %bb.0: 1287; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1288; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 1289; X64-AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4 1290; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 1291; X64-AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6 1292; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 1293; X64-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm6 1294; X64-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm7 1295; X64-AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 1296; X64-AVX1-NEXT: vblendvpd %xmm6, %xmm0, %xmm1, %xmm0 1297; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm1 1298; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm5, %xmm2 1299; X64-AVX1-NEXT: vxorpd %xmm3, %xmm2, %xmm4 1300; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 1301; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 1302; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1303; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2 1304; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3 1305; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1306; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1307; X64-AVX1-NEXT: vmovq %xmm0, %rax 1308; X64-AVX1-NEXT: vzeroupper 1309; X64-AVX1-NEXT: retq 1310; 1311; X64-AVX2-LABEL: test_reduce_v8i64: 1312; X64-AVX2: ## %bb.0: 1313; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 1314; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 1315; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4 1316; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 1317; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0 1318; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1319; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1320; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4 1321; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3 1322; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0 1323; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1324; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3 1325; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2 1326; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1327; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1328; X64-AVX2-NEXT: vmovq %xmm0, %rax 1329; X64-AVX2-NEXT: vzeroupper 1330; X64-AVX2-NEXT: retq 1331; 1332; X64-AVX512-LABEL: test_reduce_v8i64: 1333; X64-AVX512: ## %bb.0: 1334; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1335; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 1336; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1337; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 1338; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1339; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 1340; X64-AVX512-NEXT: vmovq %xmm0, %rax 1341; X64-AVX512-NEXT: vzeroupper 1342; X64-AVX512-NEXT: retq 1343 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1344 %2 = icmp ult <8 x i64> %a0, %1 1345 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1346 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1347 %5 = icmp ult <8 x i64> %3, %4 1348 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1349 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1350 %8 = icmp ult <8 x i64> %6, %7 1351 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1352 %10 = extractelement <8 x i64> %9, i32 0 1353 ret i64 %10 1354} 1355 1356define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1357; X86-SSE2-LABEL: test_reduce_v16i32: 1358; X86-SSE2: ## %bb.0: 1359; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1360; X86-SSE2-NEXT: movdqa %xmm1, %xmm5 1361; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1362; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1363; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1364; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1365; X86-SSE2-NEXT: pand %xmm6, %xmm1 1366; X86-SSE2-NEXT: pandn %xmm3, %xmm6 1367; X86-SSE2-NEXT: por %xmm1, %xmm6 1368; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1369; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1370; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1371; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1372; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1373; X86-SSE2-NEXT: pand %xmm3, %xmm0 1374; X86-SSE2-NEXT: pandn %xmm2, %xmm3 1375; X86-SSE2-NEXT: por %xmm0, %xmm3 1376; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1377; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1378; X86-SSE2-NEXT: movdqa %xmm6, %xmm1 1379; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1380; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1381; X86-SSE2-NEXT: pand %xmm1, %xmm3 1382; X86-SSE2-NEXT: pandn %xmm6, %xmm1 1383; X86-SSE2-NEXT: por %xmm3, %xmm1 1384; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1385; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1386; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1387; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 1388; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1389; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1390; X86-SSE2-NEXT: pand %xmm3, %xmm1 1391; X86-SSE2-NEXT: pandn %xmm0, %xmm3 1392; X86-SSE2-NEXT: por %xmm1, %xmm3 1393; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 1394; X86-SSE2-NEXT: movdqa %xmm3, %xmm1 1395; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1396; X86-SSE2-NEXT: pxor %xmm0, %xmm4 1397; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm4 1398; X86-SSE2-NEXT: pand %xmm4, %xmm3 1399; X86-SSE2-NEXT: pandn %xmm0, %xmm4 1400; X86-SSE2-NEXT: por %xmm3, %xmm4 1401; X86-SSE2-NEXT: movd %xmm4, %eax 1402; X86-SSE2-NEXT: retl 1403; 1404; X86-SSE42-LABEL: test_reduce_v16i32: 1405; X86-SSE42: ## %bb.0: 1406; X86-SSE42-NEXT: pminud %xmm3, %xmm1 1407; X86-SSE42-NEXT: pminud %xmm2, %xmm1 1408; X86-SSE42-NEXT: pminud %xmm0, %xmm1 1409; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1410; X86-SSE42-NEXT: pminud %xmm1, %xmm0 1411; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1412; X86-SSE42-NEXT: pminud %xmm0, %xmm1 1413; X86-SSE42-NEXT: movd %xmm1, %eax 1414; X86-SSE42-NEXT: retl 1415; 1416; X86-AVX1-LABEL: test_reduce_v16i32: 1417; X86-AVX1: ## %bb.0: 1418; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1419; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1420; X86-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1421; X86-AVX1-NEXT: vpminud %xmm2, %xmm1, %xmm1 1422; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1423; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1424; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1425; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1426; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1427; X86-AVX1-NEXT: vmovd %xmm0, %eax 1428; X86-AVX1-NEXT: vzeroupper 1429; X86-AVX1-NEXT: retl 1430; 1431; X86-AVX2-LABEL: test_reduce_v16i32: 1432; X86-AVX2: ## %bb.0: 1433; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1434; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1435; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1436; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1437; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1438; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1439; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1440; X86-AVX2-NEXT: vmovd %xmm0, %eax 1441; X86-AVX2-NEXT: vzeroupper 1442; X86-AVX2-NEXT: retl 1443; 1444; X64-SSE2-LABEL: test_reduce_v16i32: 1445; X64-SSE2: ## %bb.0: 1446; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1447; X64-SSE2-NEXT: movdqa %xmm1, %xmm5 1448; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1449; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1450; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1451; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1452; X64-SSE2-NEXT: pand %xmm6, %xmm1 1453; X64-SSE2-NEXT: pandn %xmm3, %xmm6 1454; X64-SSE2-NEXT: por %xmm1, %xmm6 1455; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1456; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1457; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1458; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1459; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1460; X64-SSE2-NEXT: pand %xmm3, %xmm0 1461; X64-SSE2-NEXT: pandn %xmm2, %xmm3 1462; X64-SSE2-NEXT: por %xmm0, %xmm3 1463; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1464; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1465; X64-SSE2-NEXT: movdqa %xmm6, %xmm1 1466; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1467; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1468; X64-SSE2-NEXT: pand %xmm1, %xmm3 1469; X64-SSE2-NEXT: pandn %xmm6, %xmm1 1470; X64-SSE2-NEXT: por %xmm3, %xmm1 1471; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1472; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1473; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1474; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 1475; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1476; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1477; X64-SSE2-NEXT: pand %xmm3, %xmm1 1478; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1479; X64-SSE2-NEXT: por %xmm1, %xmm3 1480; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 1481; X64-SSE2-NEXT: movdqa %xmm3, %xmm1 1482; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1483; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1484; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm4 1485; X64-SSE2-NEXT: pand %xmm4, %xmm3 1486; X64-SSE2-NEXT: pandn %xmm0, %xmm4 1487; X64-SSE2-NEXT: por %xmm3, %xmm4 1488; X64-SSE2-NEXT: movd %xmm4, %eax 1489; X64-SSE2-NEXT: retq 1490; 1491; X64-SSE42-LABEL: test_reduce_v16i32: 1492; X64-SSE42: ## %bb.0: 1493; X64-SSE42-NEXT: pminud %xmm3, %xmm1 1494; X64-SSE42-NEXT: pminud %xmm2, %xmm1 1495; X64-SSE42-NEXT: pminud %xmm0, %xmm1 1496; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1497; X64-SSE42-NEXT: pminud %xmm1, %xmm0 1498; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1499; X64-SSE42-NEXT: pminud %xmm0, %xmm1 1500; X64-SSE42-NEXT: movd %xmm1, %eax 1501; X64-SSE42-NEXT: retq 1502; 1503; X64-AVX1-LABEL: test_reduce_v16i32: 1504; X64-AVX1: ## %bb.0: 1505; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1506; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1507; X64-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1508; X64-AVX1-NEXT: vpminud %xmm2, %xmm1, %xmm1 1509; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1510; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1511; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1512; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1513; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1514; X64-AVX1-NEXT: vmovd %xmm0, %eax 1515; X64-AVX1-NEXT: vzeroupper 1516; X64-AVX1-NEXT: retq 1517; 1518; X64-AVX2-LABEL: test_reduce_v16i32: 1519; X64-AVX2: ## %bb.0: 1520; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1521; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1522; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1523; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1524; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1525; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1526; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 1527; X64-AVX2-NEXT: vmovd %xmm0, %eax 1528; X64-AVX2-NEXT: vzeroupper 1529; X64-AVX2-NEXT: retq 1530; 1531; X64-AVX512-LABEL: test_reduce_v16i32: 1532; X64-AVX512: ## %bb.0: 1533; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1534; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0 1535; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1536; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 1537; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1538; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 1539; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1540; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 1541; X64-AVX512-NEXT: vmovd %xmm0, %eax 1542; X64-AVX512-NEXT: vzeroupper 1543; X64-AVX512-NEXT: retq 1544 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1545 %2 = icmp ult <16 x i32> %a0, %1 1546 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1547 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1548 %5 = icmp ult <16 x i32> %3, %4 1549 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1550 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1551 %8 = icmp ult <16 x i32> %6, %7 1552 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1553 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1554 %11 = icmp ult <16 x i32> %9, %10 1555 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1556 %13 = extractelement <16 x i32> %12, i32 0 1557 ret i32 %13 1558} 1559 1560define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1561; X86-SSE2-LABEL: test_reduce_v32i16: 1562; X86-SSE2: ## %bb.0: 1563; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1564; X86-SSE2-NEXT: psubusw %xmm3, %xmm4 1565; X86-SSE2-NEXT: psubw %xmm4, %xmm1 1566; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 1567; X86-SSE2-NEXT: psubusw %xmm2, %xmm3 1568; X86-SSE2-NEXT: psubw %xmm3, %xmm0 1569; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1570; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1571; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1572; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1573; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1574; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1575; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1576; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1577; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1578; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1579; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1580; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1581; X86-SSE2-NEXT: psrld $16, %xmm1 1582; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1583; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1584; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1585; X86-SSE2-NEXT: movd %xmm0, %eax 1586; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1587; X86-SSE2-NEXT: retl 1588; 1589; X86-SSE42-LABEL: test_reduce_v32i16: 1590; X86-SSE42: ## %bb.0: 1591; X86-SSE42-NEXT: pminuw %xmm3, %xmm1 1592; X86-SSE42-NEXT: pminuw %xmm2, %xmm1 1593; X86-SSE42-NEXT: pminuw %xmm0, %xmm1 1594; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 1595; X86-SSE42-NEXT: movd %xmm0, %eax 1596; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1597; X86-SSE42-NEXT: retl 1598; 1599; X86-AVX1-LABEL: test_reduce_v32i16: 1600; X86-AVX1: ## %bb.0: 1601; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1602; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1603; X86-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2 1604; X86-AVX1-NEXT: vpminuw %xmm2, %xmm1, %xmm1 1605; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1606; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1607; X86-AVX1-NEXT: vmovd %xmm0, %eax 1608; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1609; X86-AVX1-NEXT: vzeroupper 1610; X86-AVX1-NEXT: retl 1611; 1612; X86-AVX2-LABEL: test_reduce_v32i16: 1613; X86-AVX2: ## %bb.0: 1614; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1615; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1616; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1617; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1618; X86-AVX2-NEXT: vmovd %xmm0, %eax 1619; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1620; X86-AVX2-NEXT: vzeroupper 1621; X86-AVX2-NEXT: retl 1622; 1623; X64-SSE2-LABEL: test_reduce_v32i16: 1624; X64-SSE2: ## %bb.0: 1625; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 1626; X64-SSE2-NEXT: psubusw %xmm3, %xmm4 1627; X64-SSE2-NEXT: psubw %xmm4, %xmm1 1628; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 1629; X64-SSE2-NEXT: psubusw %xmm2, %xmm3 1630; X64-SSE2-NEXT: psubw %xmm3, %xmm0 1631; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1632; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1633; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1634; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1635; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1636; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1637; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1638; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1639; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1640; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1641; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1642; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1643; X64-SSE2-NEXT: psrld $16, %xmm1 1644; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1645; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1646; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1647; X64-SSE2-NEXT: movd %xmm0, %eax 1648; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1649; X64-SSE2-NEXT: retq 1650; 1651; X64-SSE42-LABEL: test_reduce_v32i16: 1652; X64-SSE42: ## %bb.0: 1653; X64-SSE42-NEXT: pminuw %xmm3, %xmm1 1654; X64-SSE42-NEXT: pminuw %xmm2, %xmm1 1655; X64-SSE42-NEXT: pminuw %xmm0, %xmm1 1656; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 1657; X64-SSE42-NEXT: movd %xmm0, %eax 1658; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1659; X64-SSE42-NEXT: retq 1660; 1661; X64-AVX1-LABEL: test_reduce_v32i16: 1662; X64-AVX1: ## %bb.0: 1663; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1664; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1665; X64-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2 1666; X64-AVX1-NEXT: vpminuw %xmm2, %xmm1, %xmm1 1667; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1668; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1669; X64-AVX1-NEXT: vmovd %xmm0, %eax 1670; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1671; X64-AVX1-NEXT: vzeroupper 1672; X64-AVX1-NEXT: retq 1673; 1674; X64-AVX2-LABEL: test_reduce_v32i16: 1675; X64-AVX2: ## %bb.0: 1676; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1677; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1678; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1679; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1680; X64-AVX2-NEXT: vmovd %xmm0, %eax 1681; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1682; X64-AVX2-NEXT: vzeroupper 1683; X64-AVX2-NEXT: retq 1684; 1685; X64-AVX512-LABEL: test_reduce_v32i16: 1686; X64-AVX512: ## %bb.0: 1687; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1688; X64-AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1689; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1690; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1691; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1692; X64-AVX512-NEXT: vmovd %xmm0, %eax 1693; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1694; X64-AVX512-NEXT: vzeroupper 1695; X64-AVX512-NEXT: retq 1696 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1697 %2 = icmp ult <32 x i16> %a0, %1 1698 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1699 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1700 %5 = icmp ult <32 x i16> %3, %4 1701 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1702 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1703 %8 = icmp ult <32 x i16> %6, %7 1704 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1705 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1706 %11 = icmp ult <32 x i16> %9, %10 1707 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1708 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1709 %14 = icmp ult <32 x i16> %12, %13 1710 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1711 %16 = extractelement <32 x i16> %15, i32 0 1712 ret i16 %16 1713} 1714 1715define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1716; X86-SSE2-LABEL: test_reduce_v64i8: 1717; X86-SSE2: ## %bb.0: 1718; X86-SSE2-NEXT: pminub %xmm3, %xmm1 1719; X86-SSE2-NEXT: pminub %xmm2, %xmm1 1720; X86-SSE2-NEXT: pminub %xmm0, %xmm1 1721; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1722; X86-SSE2-NEXT: pminub %xmm1, %xmm0 1723; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1724; X86-SSE2-NEXT: pminub %xmm0, %xmm1 1725; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1726; X86-SSE2-NEXT: psrld $16, %xmm0 1727; X86-SSE2-NEXT: pminub %xmm1, %xmm0 1728; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1729; X86-SSE2-NEXT: psrlw $8, %xmm1 1730; X86-SSE2-NEXT: pminub %xmm0, %xmm1 1731; X86-SSE2-NEXT: movd %xmm1, %eax 1732; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1733; X86-SSE2-NEXT: retl 1734; 1735; X86-SSE42-LABEL: test_reduce_v64i8: 1736; X86-SSE42: ## %bb.0: 1737; X86-SSE42-NEXT: pminub %xmm3, %xmm1 1738; X86-SSE42-NEXT: pminub %xmm2, %xmm1 1739; X86-SSE42-NEXT: pminub %xmm0, %xmm1 1740; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1741; X86-SSE42-NEXT: psrlw $8, %xmm0 1742; X86-SSE42-NEXT: pminub %xmm1, %xmm0 1743; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1744; X86-SSE42-NEXT: movd %xmm0, %eax 1745; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1746; X86-SSE42-NEXT: retl 1747; 1748; X86-AVX1-LABEL: test_reduce_v64i8: 1749; X86-AVX1: ## %bb.0: 1750; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1751; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1752; X86-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2 1753; X86-AVX1-NEXT: vpminub %xmm2, %xmm1, %xmm1 1754; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1755; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1756; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1757; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1758; X86-AVX1-NEXT: vmovd %xmm0, %eax 1759; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1760; X86-AVX1-NEXT: vzeroupper 1761; X86-AVX1-NEXT: retl 1762; 1763; X86-AVX2-LABEL: test_reduce_v64i8: 1764; X86-AVX2: ## %bb.0: 1765; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0 1766; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1767; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1768; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1769; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1770; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1771; X86-AVX2-NEXT: vmovd %xmm0, %eax 1772; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1773; X86-AVX2-NEXT: vzeroupper 1774; X86-AVX2-NEXT: retl 1775; 1776; X64-SSE2-LABEL: test_reduce_v64i8: 1777; X64-SSE2: ## %bb.0: 1778; X64-SSE2-NEXT: pminub %xmm3, %xmm1 1779; X64-SSE2-NEXT: pminub %xmm2, %xmm1 1780; X64-SSE2-NEXT: pminub %xmm0, %xmm1 1781; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1782; X64-SSE2-NEXT: pminub %xmm1, %xmm0 1783; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1784; X64-SSE2-NEXT: pminub %xmm0, %xmm1 1785; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1786; X64-SSE2-NEXT: psrld $16, %xmm0 1787; X64-SSE2-NEXT: pminub %xmm1, %xmm0 1788; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1789; X64-SSE2-NEXT: psrlw $8, %xmm1 1790; X64-SSE2-NEXT: pminub %xmm0, %xmm1 1791; X64-SSE2-NEXT: movd %xmm1, %eax 1792; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1793; X64-SSE2-NEXT: retq 1794; 1795; X64-SSE42-LABEL: test_reduce_v64i8: 1796; X64-SSE42: ## %bb.0: 1797; X64-SSE42-NEXT: pminub %xmm3, %xmm1 1798; X64-SSE42-NEXT: pminub %xmm2, %xmm1 1799; X64-SSE42-NEXT: pminub %xmm0, %xmm1 1800; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1801; X64-SSE42-NEXT: psrlw $8, %xmm0 1802; X64-SSE42-NEXT: pminub %xmm1, %xmm0 1803; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1804; X64-SSE42-NEXT: movd %xmm0, %eax 1805; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1806; X64-SSE42-NEXT: retq 1807; 1808; X64-AVX1-LABEL: test_reduce_v64i8: 1809; X64-AVX1: ## %bb.0: 1810; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1811; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1812; X64-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2 1813; X64-AVX1-NEXT: vpminub %xmm2, %xmm1, %xmm1 1814; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1815; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1816; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1817; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1818; X64-AVX1-NEXT: vmovd %xmm0, %eax 1819; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1820; X64-AVX1-NEXT: vzeroupper 1821; X64-AVX1-NEXT: retq 1822; 1823; X64-AVX2-LABEL: test_reduce_v64i8: 1824; X64-AVX2: ## %bb.0: 1825; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0 1826; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1827; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1828; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1829; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1830; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1831; X64-AVX2-NEXT: vmovd %xmm0, %eax 1832; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1833; X64-AVX2-NEXT: vzeroupper 1834; X64-AVX2-NEXT: retq 1835; 1836; X64-AVX512-LABEL: test_reduce_v64i8: 1837; X64-AVX512: ## %bb.0: 1838; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1839; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0 1840; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1841; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1842; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1843; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1844; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1845; X64-AVX512-NEXT: vmovd %xmm0, %eax 1846; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1847; X64-AVX512-NEXT: vzeroupper 1848; X64-AVX512-NEXT: retq 1849 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1850 %2 = icmp ult <64 x i8> %a0, %1 1851 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1852 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1853 %5 = icmp ult <64 x i8> %3, %4 1854 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1855 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1856 %8 = icmp ult <64 x i8> %6, %7 1857 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1858 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1859 %11 = icmp ult <64 x i8> %9, %10 1860 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1861 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1862 %14 = icmp ult <64 x i8> %12, %13 1863 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1864 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1865 %17 = icmp ult <64 x i8> %15, %16 1866 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1867 %19 = extractelement <64 x i8> %18, i32 0 1868 ret i8 %19 1869} 1870 1871; 1872; Partial Vector Reductions 1873; 1874 1875define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { 1876; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: 1877; X86-SSE2: ## %bb.0: 1878; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1879; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1880; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1881; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1882; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1883; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1884; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1885; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1886; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1887; X86-SSE2-NEXT: psrld $16, %xmm1 1888; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1889; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1890; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1891; X86-SSE2-NEXT: movd %xmm0, %eax 1892; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1893; X86-SSE2-NEXT: retl 1894; 1895; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: 1896; X86-SSE42: ## %bb.0: 1897; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1898; X86-SSE42-NEXT: movd %xmm0, %eax 1899; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1900; X86-SSE42-NEXT: retl 1901; 1902; X86-AVX-LABEL: test_reduce_v16i16_v8i16: 1903; X86-AVX: ## %bb.0: 1904; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1905; X86-AVX-NEXT: vmovd %xmm0, %eax 1906; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1907; X86-AVX-NEXT: vzeroupper 1908; X86-AVX-NEXT: retl 1909; 1910; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: 1911; X64-SSE2: ## %bb.0: 1912; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1913; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1914; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1915; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1916; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1917; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1918; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1919; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1920; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1921; X64-SSE2-NEXT: psrld $16, %xmm1 1922; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1923; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1924; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1925; X64-SSE2-NEXT: movd %xmm0, %eax 1926; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1927; X64-SSE2-NEXT: retq 1928; 1929; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: 1930; X64-SSE42: ## %bb.0: 1931; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1932; X64-SSE42-NEXT: movd %xmm0, %eax 1933; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1934; X64-SSE42-NEXT: retq 1935; 1936; X64-AVX-LABEL: test_reduce_v16i16_v8i16: 1937; X64-AVX: ## %bb.0: 1938; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 1939; X64-AVX-NEXT: vmovd %xmm0, %eax 1940; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1941; X64-AVX-NEXT: vzeroupper 1942; X64-AVX-NEXT: retq 1943 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1944 %2 = icmp ult <16 x i16> %a0, %1 1945 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 1946 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1947 %5 = icmp ult <16 x i16> %3, %4 1948 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 1949 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1950 %8 = icmp ult <16 x i16> %6, %7 1951 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 1952 %10 = extractelement <16 x i16> %9, i32 0 1953 ret i16 %10 1954} 1955 1956define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { 1957; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: 1958; X86-SSE2: ## %bb.0: 1959; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1960; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1961; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1962; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1963; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1964; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1965; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1966; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1967; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1968; X86-SSE2-NEXT: psrld $16, %xmm1 1969; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1970; X86-SSE2-NEXT: psubusw %xmm1, %xmm2 1971; X86-SSE2-NEXT: psubw %xmm2, %xmm0 1972; X86-SSE2-NEXT: movd %xmm0, %eax 1973; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1974; X86-SSE2-NEXT: retl 1975; 1976; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: 1977; X86-SSE42: ## %bb.0: 1978; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1979; X86-SSE42-NEXT: movd %xmm0, %eax 1980; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1981; X86-SSE42-NEXT: retl 1982; 1983; X86-AVX-LABEL: test_reduce_v32i16_v8i16: 1984; X86-AVX: ## %bb.0: 1985; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1986; X86-AVX-NEXT: vmovd %xmm0, %eax 1987; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1988; X86-AVX-NEXT: vzeroupper 1989; X86-AVX-NEXT: retl 1990; 1991; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: 1992; X64-SSE2: ## %bb.0: 1993; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1994; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1995; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 1996; X64-SSE2-NEXT: psubw %xmm2, %xmm0 1997; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1998; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1999; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 2000; X64-SSE2-NEXT: psubw %xmm2, %xmm0 2001; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2002; X64-SSE2-NEXT: psrld $16, %xmm1 2003; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2004; X64-SSE2-NEXT: psubusw %xmm1, %xmm2 2005; X64-SSE2-NEXT: psubw %xmm2, %xmm0 2006; X64-SSE2-NEXT: movd %xmm0, %eax 2007; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 2008; X64-SSE2-NEXT: retq 2009; 2010; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: 2011; X64-SSE42: ## %bb.0: 2012; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 2013; X64-SSE42-NEXT: movd %xmm0, %eax 2014; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 2015; X64-SSE42-NEXT: retq 2016; 2017; X64-AVX-LABEL: test_reduce_v32i16_v8i16: 2018; X64-AVX: ## %bb.0: 2019; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2020; X64-AVX-NEXT: vmovd %xmm0, %eax 2021; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 2022; X64-AVX-NEXT: vzeroupper 2023; X64-AVX-NEXT: retq 2024 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2025 %2 = icmp ult <32 x i16> %a0, %1 2026 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 2027 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2028 %5 = icmp ult <32 x i16> %3, %4 2029 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 2030 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2031 %8 = icmp ult <32 x i16> %6, %7 2032 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 2033 %10 = extractelement <32 x i16> %9, i32 0 2034 ret i16 %10 2035} 2036 2037define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { 2038; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: 2039; X86-SSE2: ## %bb.0: 2040; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2041; X86-SSE2-NEXT: pminub %xmm0, %xmm1 2042; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2043; X86-SSE2-NEXT: pminub %xmm1, %xmm0 2044; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2045; X86-SSE2-NEXT: psrld $16, %xmm1 2046; X86-SSE2-NEXT: pminub %xmm0, %xmm1 2047; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2048; X86-SSE2-NEXT: psrlw $8, %xmm0 2049; X86-SSE2-NEXT: pminub %xmm1, %xmm0 2050; X86-SSE2-NEXT: movd %xmm0, %eax 2051; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2052; X86-SSE2-NEXT: retl 2053; 2054; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: 2055; X86-SSE42: ## %bb.0: 2056; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2057; X86-SSE42-NEXT: psrlw $8, %xmm1 2058; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2059; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2060; X86-SSE42-NEXT: movd %xmm0, %eax 2061; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2062; X86-SSE42-NEXT: retl 2063; 2064; X86-AVX-LABEL: test_reduce_v32i8_v16i8: 2065; X86-AVX: ## %bb.0: 2066; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2067; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2068; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2069; X86-AVX-NEXT: vmovd %xmm0, %eax 2070; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2071; X86-AVX-NEXT: vzeroupper 2072; X86-AVX-NEXT: retl 2073; 2074; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: 2075; X64-SSE2: ## %bb.0: 2076; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2077; X64-SSE2-NEXT: pminub %xmm0, %xmm1 2078; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2079; X64-SSE2-NEXT: pminub %xmm1, %xmm0 2080; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2081; X64-SSE2-NEXT: psrld $16, %xmm1 2082; X64-SSE2-NEXT: pminub %xmm0, %xmm1 2083; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2084; X64-SSE2-NEXT: psrlw $8, %xmm0 2085; X64-SSE2-NEXT: pminub %xmm1, %xmm0 2086; X64-SSE2-NEXT: movd %xmm0, %eax 2087; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2088; X64-SSE2-NEXT: retq 2089; 2090; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: 2091; X64-SSE42: ## %bb.0: 2092; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2093; X64-SSE42-NEXT: psrlw $8, %xmm1 2094; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2095; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2096; X64-SSE42-NEXT: movd %xmm0, %eax 2097; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2098; X64-SSE42-NEXT: retq 2099; 2100; X64-AVX-LABEL: test_reduce_v32i8_v16i8: 2101; X64-AVX: ## %bb.0: 2102; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2103; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2104; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2105; X64-AVX-NEXT: vmovd %xmm0, %eax 2106; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 2107; X64-AVX-NEXT: vzeroupper 2108; X64-AVX-NEXT: retq 2109 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2110 %2 = icmp ult <32 x i8> %a0, %1 2111 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 2112 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2113 %5 = icmp ult <32 x i8> %3, %4 2114 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 2115 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2116 %8 = icmp ult <32 x i8> %6, %7 2117 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 2118 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2119 %11 = icmp ult <32 x i8> %9, %10 2120 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 2121 %13 = extractelement <32 x i8> %12, i32 0 2122 ret i8 %13 2123} 2124 2125define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { 2126; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: 2127; X86-SSE2: ## %bb.0: 2128; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2129; X86-SSE2-NEXT: pminub %xmm0, %xmm1 2130; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2131; X86-SSE2-NEXT: pminub %xmm1, %xmm0 2132; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2133; X86-SSE2-NEXT: psrld $16, %xmm1 2134; X86-SSE2-NEXT: pminub %xmm0, %xmm1 2135; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2136; X86-SSE2-NEXT: psrlw $8, %xmm0 2137; X86-SSE2-NEXT: pminub %xmm1, %xmm0 2138; X86-SSE2-NEXT: movd %xmm0, %eax 2139; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2140; X86-SSE2-NEXT: retl 2141; 2142; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: 2143; X86-SSE42: ## %bb.0: 2144; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2145; X86-SSE42-NEXT: psrlw $8, %xmm1 2146; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2147; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2148; X86-SSE42-NEXT: movd %xmm0, %eax 2149; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2150; X86-SSE42-NEXT: retl 2151; 2152; X86-AVX-LABEL: test_reduce_v64i8_v16i8: 2153; X86-AVX: ## %bb.0: 2154; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2155; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2156; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2157; X86-AVX-NEXT: vmovd %xmm0, %eax 2158; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2159; X86-AVX-NEXT: vzeroupper 2160; X86-AVX-NEXT: retl 2161; 2162; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: 2163; X64-SSE2: ## %bb.0: 2164; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2165; X64-SSE2-NEXT: pminub %xmm0, %xmm1 2166; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 2167; X64-SSE2-NEXT: pminub %xmm1, %xmm0 2168; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2169; X64-SSE2-NEXT: psrld $16, %xmm1 2170; X64-SSE2-NEXT: pminub %xmm0, %xmm1 2171; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2172; X64-SSE2-NEXT: psrlw $8, %xmm0 2173; X64-SSE2-NEXT: pminub %xmm1, %xmm0 2174; X64-SSE2-NEXT: movd %xmm0, %eax 2175; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2176; X64-SSE2-NEXT: retq 2177; 2178; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: 2179; X64-SSE42: ## %bb.0: 2180; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2181; X64-SSE42-NEXT: psrlw $8, %xmm1 2182; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2183; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2184; X64-SSE42-NEXT: movd %xmm0, %eax 2185; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2186; X64-SSE42-NEXT: retq 2187; 2188; X64-AVX-LABEL: test_reduce_v64i8_v16i8: 2189; X64-AVX: ## %bb.0: 2190; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2191; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2192; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2193; X64-AVX-NEXT: vmovd %xmm0, %eax 2194; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 2195; X64-AVX-NEXT: vzeroupper 2196; X64-AVX-NEXT: retq 2197 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2198 %2 = icmp ult <64 x i8> %a0, %1 2199 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 2200 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2201 %5 = icmp ult <64 x i8> %3, %4 2202 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 2203 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2204 %8 = icmp ult <64 x i8> %6, %7 2205 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 2206 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2207 %11 = icmp ult <64 x i8> %9, %10 2208 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 2209 %13 = extractelement <64 x i8> %12, i32 0 2210 ret i8 %13 2211} 2212