1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86-SSE42 4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 5; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42 8; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1 9; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2 10; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512 11 12; 13; 128-bit Vectors 14; 15 16define i64 @test_reduce_v2i64(<2 x i64> %a0) { 17; X86-SSE2-LABEL: test_reduce_v2i64: 18; X86-SSE2: ## %bb.0: 19; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 20; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 21; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 22; X86-SSE2-NEXT: pxor %xmm2, %xmm3 23; X86-SSE2-NEXT: pxor %xmm1, %xmm2 24; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 25; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 26; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 27; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 28; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 29; X86-SSE2-NEXT: pand %xmm5, %xmm2 30; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 31; X86-SSE2-NEXT: por %xmm2, %xmm3 32; X86-SSE2-NEXT: pand %xmm3, %xmm0 33; X86-SSE2-NEXT: pandn %xmm1, %xmm3 34; X86-SSE2-NEXT: por %xmm0, %xmm3 35; X86-SSE2-NEXT: movd %xmm3, %eax 36; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1] 37; X86-SSE2-NEXT: movd %xmm0, %edx 38; X86-SSE2-NEXT: retl 39; 40; X86-SSE42-LABEL: test_reduce_v2i64: 41; X86-SSE42: ## %bb.0: 42; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 43; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 44; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 45; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 46; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 47; X86-SSE42-NEXT: movd %xmm2, %eax 48; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 49; X86-SSE42-NEXT: retl 50; 51; X86-AVX-LABEL: test_reduce_v2i64: 52; X86-AVX: ## %bb.0: 53; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 54; X86-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 55; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 56; X86-AVX-NEXT: vmovd %xmm0, %eax 57; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx 58; X86-AVX-NEXT: retl 59; 60; X64-SSE2-LABEL: test_reduce_v2i64: 61; X64-SSE2: ## %bb.0: 62; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 63; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 64; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 65; X64-SSE2-NEXT: pxor %xmm2, %xmm3 66; X64-SSE2-NEXT: pxor %xmm1, %xmm2 67; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 68; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 69; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 70; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 71; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 72; X64-SSE2-NEXT: pand %xmm5, %xmm2 73; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 74; X64-SSE2-NEXT: por %xmm2, %xmm3 75; X64-SSE2-NEXT: pand %xmm3, %xmm0 76; X64-SSE2-NEXT: pandn %xmm1, %xmm3 77; X64-SSE2-NEXT: por %xmm0, %xmm3 78; X64-SSE2-NEXT: movq %xmm3, %rax 79; X64-SSE2-NEXT: retq 80; 81; X64-SSE42-LABEL: test_reduce_v2i64: 82; X64-SSE42: ## %bb.0: 83; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 84; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 85; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 86; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 87; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 88; X64-SSE42-NEXT: movq %xmm2, %rax 89; X64-SSE42-NEXT: retq 90; 91; X64-AVX1-LABEL: test_reduce_v2i64: 92; X64-AVX1: ## %bb.0: 93; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 94; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 95; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 96; X64-AVX1-NEXT: vmovq %xmm0, %rax 97; X64-AVX1-NEXT: retq 98; 99; X64-AVX2-LABEL: test_reduce_v2i64: 100; X64-AVX2: ## %bb.0: 101; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 102; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 103; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 104; X64-AVX2-NEXT: vmovq %xmm0, %rax 105; X64-AVX2-NEXT: retq 106; 107; X64-AVX512-LABEL: test_reduce_v2i64: 108; X64-AVX512: ## %bb.0: 109; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 110; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 111; X64-AVX512-NEXT: vmovq %xmm0, %rax 112; X64-AVX512-NEXT: retq 113 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 114 %2 = icmp slt <2 x i64> %a0, %1 115 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1 116 %4 = extractelement <2 x i64> %3, i32 0 117 ret i64 %4 118} 119 120define i32 @test_reduce_v4i32(<4 x i32> %a0) { 121; X86-SSE2-LABEL: test_reduce_v4i32: 122; X86-SSE2: ## %bb.0: 123; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 124; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 125; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 126; X86-SSE2-NEXT: pand %xmm2, %xmm0 127; X86-SSE2-NEXT: pandn %xmm1, %xmm2 128; X86-SSE2-NEXT: por %xmm0, %xmm2 129; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 130; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 131; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 132; X86-SSE2-NEXT: pand %xmm1, %xmm2 133; X86-SSE2-NEXT: pandn %xmm0, %xmm1 134; X86-SSE2-NEXT: por %xmm2, %xmm1 135; X86-SSE2-NEXT: movd %xmm1, %eax 136; X86-SSE2-NEXT: retl 137; 138; X86-SSE42-LABEL: test_reduce_v4i32: 139; X86-SSE42: ## %bb.0: 140; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 141; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 142; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 143; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 144; X86-SSE42-NEXT: movd %xmm0, %eax 145; X86-SSE42-NEXT: retl 146; 147; X86-AVX-LABEL: test_reduce_v4i32: 148; X86-AVX: ## %bb.0: 149; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 150; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 151; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 152; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 153; X86-AVX-NEXT: vmovd %xmm0, %eax 154; X86-AVX-NEXT: retl 155; 156; X64-SSE2-LABEL: test_reduce_v4i32: 157; X64-SSE2: ## %bb.0: 158; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 159; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 160; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 161; X64-SSE2-NEXT: pand %xmm2, %xmm0 162; X64-SSE2-NEXT: pandn %xmm1, %xmm2 163; X64-SSE2-NEXT: por %xmm0, %xmm2 164; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 165; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 166; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 167; X64-SSE2-NEXT: pand %xmm1, %xmm2 168; X64-SSE2-NEXT: pandn %xmm0, %xmm1 169; X64-SSE2-NEXT: por %xmm2, %xmm1 170; X64-SSE2-NEXT: movd %xmm1, %eax 171; X64-SSE2-NEXT: retq 172; 173; X64-SSE42-LABEL: test_reduce_v4i32: 174; X64-SSE42: ## %bb.0: 175; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 176; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 177; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 178; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 179; X64-SSE42-NEXT: movd %xmm0, %eax 180; X64-SSE42-NEXT: retq 181; 182; X64-AVX-LABEL: test_reduce_v4i32: 183; X64-AVX: ## %bb.0: 184; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 185; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 186; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 187; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 188; X64-AVX-NEXT: vmovd %xmm0, %eax 189; X64-AVX-NEXT: retq 190 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 191 %2 = icmp slt <4 x i32> %a0, %1 192 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1 193 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 194 %5 = icmp slt <4 x i32> %3, %4 195 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4 196 %7 = extractelement <4 x i32> %6, i32 0 197 ret i32 %7 198} 199 200define i16 @test_reduce_v8i16(<8 x i16> %a0) { 201; X86-SSE2-LABEL: test_reduce_v8i16: 202; X86-SSE2: ## %bb.0: 203; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 204; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 205; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 206; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 207; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 208; X86-SSE2-NEXT: psrld $16, %xmm1 209; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 210; X86-SSE2-NEXT: movd %xmm1, %eax 211; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 212; X86-SSE2-NEXT: retl 213; 214; X86-SSE42-LABEL: test_reduce_v8i16: 215; X86-SSE42: ## %bb.0: 216; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0 217; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 218; X86-SSE42-NEXT: movd %xmm0, %eax 219; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 220; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 221; X86-SSE42-NEXT: retl 222; 223; X86-AVX-LABEL: test_reduce_v8i16: 224; X86-AVX: ## %bb.0: 225; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0 226; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 227; X86-AVX-NEXT: vmovd %xmm0, %eax 228; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 229; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 230; X86-AVX-NEXT: retl 231; 232; X64-SSE2-LABEL: test_reduce_v8i16: 233; X64-SSE2: ## %bb.0: 234; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 235; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 236; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 237; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 238; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 239; X64-SSE2-NEXT: psrld $16, %xmm1 240; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 241; X64-SSE2-NEXT: movd %xmm1, %eax 242; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 243; X64-SSE2-NEXT: retq 244; 245; X64-SSE42-LABEL: test_reduce_v8i16: 246; X64-SSE42: ## %bb.0: 247; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 248; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 249; X64-SSE42-NEXT: movd %xmm0, %eax 250; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 251; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 252; X64-SSE42-NEXT: retq 253; 254; X64-AVX-LABEL: test_reduce_v8i16: 255; X64-AVX: ## %bb.0: 256; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 257; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 258; X64-AVX-NEXT: vmovd %xmm0, %eax 259; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 260; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 261; X64-AVX-NEXT: retq 262 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 263 %2 = icmp slt <8 x i16> %a0, %1 264 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1 265 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 266 %5 = icmp slt <8 x i16> %3, %4 267 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4 268 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 269 %8 = icmp slt <8 x i16> %6, %7 270 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7 271 %10 = extractelement <8 x i16> %9, i32 0 272 ret i16 %10 273} 274 275define i8 @test_reduce_v16i8(<16 x i8> %a0) { 276; X86-SSE2-LABEL: test_reduce_v16i8: 277; X86-SSE2: ## %bb.0: 278; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 279; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 280; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 281; X86-SSE2-NEXT: pand %xmm2, %xmm0 282; X86-SSE2-NEXT: pandn %xmm1, %xmm2 283; X86-SSE2-NEXT: por %xmm0, %xmm2 284; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 285; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 286; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 287; X86-SSE2-NEXT: pand %xmm1, %xmm2 288; X86-SSE2-NEXT: pandn %xmm0, %xmm1 289; X86-SSE2-NEXT: por %xmm2, %xmm1 290; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 291; X86-SSE2-NEXT: psrld $16, %xmm0 292; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 293; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 294; X86-SSE2-NEXT: pand %xmm2, %xmm1 295; X86-SSE2-NEXT: pandn %xmm0, %xmm2 296; X86-SSE2-NEXT: por %xmm1, %xmm2 297; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 298; X86-SSE2-NEXT: psrlw $8, %xmm0 299; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 300; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 301; X86-SSE2-NEXT: pand %xmm1, %xmm2 302; X86-SSE2-NEXT: pandn %xmm0, %xmm1 303; X86-SSE2-NEXT: por %xmm2, %xmm1 304; X86-SSE2-NEXT: movd %xmm1, %eax 305; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 306; X86-SSE2-NEXT: retl 307; 308; X86-SSE42-LABEL: test_reduce_v16i8: 309; X86-SSE42: ## %bb.0: 310; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0 311; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 312; X86-SSE42-NEXT: psrlw $8, %xmm1 313; X86-SSE42-NEXT: pminub %xmm0, %xmm1 314; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 315; X86-SSE42-NEXT: movd %xmm0, %eax 316; X86-SSE42-NEXT: xorb $-128, %al 317; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 318; X86-SSE42-NEXT: retl 319; 320; X86-AVX-LABEL: test_reduce_v16i8: 321; X86-AVX: ## %bb.0: 322; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0 323; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 324; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 325; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 326; X86-AVX-NEXT: vmovd %xmm0, %eax 327; X86-AVX-NEXT: xorb $-128, %al 328; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 329; X86-AVX-NEXT: retl 330; 331; X64-SSE2-LABEL: test_reduce_v16i8: 332; X64-SSE2: ## %bb.0: 333; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 334; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 335; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 336; X64-SSE2-NEXT: pand %xmm2, %xmm0 337; X64-SSE2-NEXT: pandn %xmm1, %xmm2 338; X64-SSE2-NEXT: por %xmm0, %xmm2 339; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 340; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 341; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 342; X64-SSE2-NEXT: pand %xmm1, %xmm2 343; X64-SSE2-NEXT: pandn %xmm0, %xmm1 344; X64-SSE2-NEXT: por %xmm2, %xmm1 345; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 346; X64-SSE2-NEXT: psrld $16, %xmm0 347; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 348; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 349; X64-SSE2-NEXT: pand %xmm2, %xmm1 350; X64-SSE2-NEXT: pandn %xmm0, %xmm2 351; X64-SSE2-NEXT: por %xmm1, %xmm2 352; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 353; X64-SSE2-NEXT: psrlw $8, %xmm0 354; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 355; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 356; X64-SSE2-NEXT: pand %xmm1, %xmm2 357; X64-SSE2-NEXT: pandn %xmm0, %xmm1 358; X64-SSE2-NEXT: por %xmm2, %xmm1 359; X64-SSE2-NEXT: movd %xmm1, %eax 360; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 361; X64-SSE2-NEXT: retq 362; 363; X64-SSE42-LABEL: test_reduce_v16i8: 364; X64-SSE42: ## %bb.0: 365; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 366; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 367; X64-SSE42-NEXT: psrlw $8, %xmm1 368; X64-SSE42-NEXT: pminub %xmm0, %xmm1 369; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 370; X64-SSE42-NEXT: movd %xmm0, %eax 371; X64-SSE42-NEXT: xorb $-128, %al 372; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 373; X64-SSE42-NEXT: retq 374; 375; X64-AVX-LABEL: test_reduce_v16i8: 376; X64-AVX: ## %bb.0: 377; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 378; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 379; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 380; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 381; X64-AVX-NEXT: vmovd %xmm0, %eax 382; X64-AVX-NEXT: xorb $-128, %al 383; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 384; X64-AVX-NEXT: retq 385 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 386 %2 = icmp slt <16 x i8> %a0, %1 387 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1 388 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 389 %5 = icmp slt <16 x i8> %3, %4 390 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4 391 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 392 %8 = icmp slt <16 x i8> %6, %7 393 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7 394 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 395 %11 = icmp slt <16 x i8> %9, %10 396 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10 397 %13 = extractelement <16 x i8> %12, i32 0 398 ret i8 %13 399} 400 401; 402; 256-bit Vectors 403; 404 405define i64 @test_reduce_v4i64(<4 x i64> %a0) { 406; X86-SSE2-LABEL: test_reduce_v4i64: 407; X86-SSE2: ## %bb.0: 408; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] 409; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 410; X86-SSE2-NEXT: pxor %xmm2, %xmm3 411; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 412; X86-SSE2-NEXT: pxor %xmm2, %xmm4 413; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 414; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 415; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 416; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 417; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 418; X86-SSE2-NEXT: pand %xmm6, %xmm3 419; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 420; X86-SSE2-NEXT: por %xmm3, %xmm4 421; X86-SSE2-NEXT: pand %xmm4, %xmm0 422; X86-SSE2-NEXT: pandn %xmm1, %xmm4 423; X86-SSE2-NEXT: por %xmm0, %xmm4 424; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 425; X86-SSE2-NEXT: movdqa %xmm4, %xmm1 426; X86-SSE2-NEXT: pxor %xmm2, %xmm1 427; X86-SSE2-NEXT: pxor %xmm0, %xmm2 428; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 429; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 430; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 431; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 432; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 433; X86-SSE2-NEXT: pand %xmm5, %xmm1 434; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 435; X86-SSE2-NEXT: por %xmm1, %xmm2 436; X86-SSE2-NEXT: pand %xmm2, %xmm4 437; X86-SSE2-NEXT: pandn %xmm0, %xmm2 438; X86-SSE2-NEXT: por %xmm4, %xmm2 439; X86-SSE2-NEXT: movd %xmm2, %eax 440; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 441; X86-SSE2-NEXT: movd %xmm0, %edx 442; X86-SSE2-NEXT: retl 443; 444; X86-SSE42-LABEL: test_reduce_v4i64: 445; X86-SSE42: ## %bb.0: 446; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 447; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 448; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 449; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 450; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 451; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 452; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 453; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 454; X86-SSE42-NEXT: movd %xmm2, %eax 455; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx 456; X86-SSE42-NEXT: retl 457; 458; X86-AVX1-LABEL: test_reduce_v4i64: 459; X86-AVX1: ## %bb.0: 460; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 461; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 462; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 463; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 464; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 465; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 466; X86-AVX1-NEXT: vmovd %xmm0, %eax 467; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 468; X86-AVX1-NEXT: vzeroupper 469; X86-AVX1-NEXT: retl 470; 471; X86-AVX2-LABEL: test_reduce_v4i64: 472; X86-AVX2: ## %bb.0: 473; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 474; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 475; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 476; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 477; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 478; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 479; X86-AVX2-NEXT: vmovd %xmm0, %eax 480; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 481; X86-AVX2-NEXT: vzeroupper 482; X86-AVX2-NEXT: retl 483; 484; X64-SSE2-LABEL: test_reduce_v4i64: 485; X64-SSE2: ## %bb.0: 486; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 487; X64-SSE2-NEXT: movdqa %xmm0, %xmm3 488; X64-SSE2-NEXT: pxor %xmm2, %xmm3 489; X64-SSE2-NEXT: movdqa %xmm1, %xmm4 490; X64-SSE2-NEXT: pxor %xmm2, %xmm4 491; X64-SSE2-NEXT: movdqa %xmm4, %xmm5 492; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 493; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 494; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4 495; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 496; X64-SSE2-NEXT: pand %xmm6, %xmm3 497; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 498; X64-SSE2-NEXT: por %xmm3, %xmm4 499; X64-SSE2-NEXT: pand %xmm4, %xmm0 500; X64-SSE2-NEXT: pandn %xmm1, %xmm4 501; X64-SSE2-NEXT: por %xmm0, %xmm4 502; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3] 503; X64-SSE2-NEXT: movdqa %xmm4, %xmm1 504; X64-SSE2-NEXT: pxor %xmm2, %xmm1 505; X64-SSE2-NEXT: pxor %xmm0, %xmm2 506; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 507; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 508; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 509; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 510; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 511; X64-SSE2-NEXT: pand %xmm5, %xmm1 512; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 513; X64-SSE2-NEXT: por %xmm1, %xmm2 514; X64-SSE2-NEXT: pand %xmm2, %xmm4 515; X64-SSE2-NEXT: pandn %xmm0, %xmm2 516; X64-SSE2-NEXT: por %xmm4, %xmm2 517; X64-SSE2-NEXT: movq %xmm2, %rax 518; X64-SSE2-NEXT: retq 519; 520; X64-SSE42-LABEL: test_reduce_v4i64: 521; X64-SSE42: ## %bb.0: 522; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 523; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 524; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 525; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 526; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 527; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 528; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 529; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 530; X64-SSE42-NEXT: movq %xmm2, %rax 531; X64-SSE42-NEXT: retq 532; 533; X64-AVX1-LABEL: test_reduce_v4i64: 534; X64-AVX1: ## %bb.0: 535; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 536; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 537; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 538; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 539; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 540; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 541; X64-AVX1-NEXT: vmovq %xmm0, %rax 542; X64-AVX1-NEXT: vzeroupper 543; X64-AVX1-NEXT: retq 544; 545; X64-AVX2-LABEL: test_reduce_v4i64: 546; X64-AVX2: ## %bb.0: 547; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 548; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 549; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 550; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 551; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 552; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 553; X64-AVX2-NEXT: vmovq %xmm0, %rax 554; X64-AVX2-NEXT: vzeroupper 555; X64-AVX2-NEXT: retq 556; 557; X64-AVX512-LABEL: test_reduce_v4i64: 558; X64-AVX512: ## %bb.0: 559; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 560; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 561; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 562; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 563; X64-AVX512-NEXT: vmovq %xmm0, %rax 564; X64-AVX512-NEXT: vzeroupper 565; X64-AVX512-NEXT: retq 566 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 567 %2 = icmp slt <4 x i64> %a0, %1 568 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1 569 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 570 %5 = icmp slt <4 x i64> %3, %4 571 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4 572 %7 = extractelement <4 x i64> %6, i32 0 573 ret i64 %7 574} 575 576define i32 @test_reduce_v8i32(<8 x i32> %a0) { 577; X86-SSE2-LABEL: test_reduce_v8i32: 578; X86-SSE2: ## %bb.0: 579; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 580; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 581; X86-SSE2-NEXT: pand %xmm2, %xmm0 582; X86-SSE2-NEXT: pandn %xmm1, %xmm2 583; X86-SSE2-NEXT: por %xmm0, %xmm2 584; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 585; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 586; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 587; X86-SSE2-NEXT: pand %xmm1, %xmm2 588; X86-SSE2-NEXT: pandn %xmm0, %xmm1 589; X86-SSE2-NEXT: por %xmm2, %xmm1 590; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 591; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 592; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 593; X86-SSE2-NEXT: pand %xmm2, %xmm1 594; X86-SSE2-NEXT: pandn %xmm0, %xmm2 595; X86-SSE2-NEXT: por %xmm1, %xmm2 596; X86-SSE2-NEXT: movd %xmm2, %eax 597; X86-SSE2-NEXT: retl 598; 599; X86-SSE42-LABEL: test_reduce_v8i32: 600; X86-SSE42: ## %bb.0: 601; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 602; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 603; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 604; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 605; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 606; X86-SSE42-NEXT: movd %xmm0, %eax 607; X86-SSE42-NEXT: retl 608; 609; X86-AVX1-LABEL: test_reduce_v8i32: 610; X86-AVX1: ## %bb.0: 611; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 612; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 613; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 614; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 615; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 616; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 617; X86-AVX1-NEXT: vmovd %xmm0, %eax 618; X86-AVX1-NEXT: vzeroupper 619; X86-AVX1-NEXT: retl 620; 621; X86-AVX2-LABEL: test_reduce_v8i32: 622; X86-AVX2: ## %bb.0: 623; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 624; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 625; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 626; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 627; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 628; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 629; X86-AVX2-NEXT: vmovd %xmm0, %eax 630; X86-AVX2-NEXT: vzeroupper 631; X86-AVX2-NEXT: retl 632; 633; X64-SSE2-LABEL: test_reduce_v8i32: 634; X64-SSE2: ## %bb.0: 635; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 636; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 637; X64-SSE2-NEXT: pand %xmm2, %xmm0 638; X64-SSE2-NEXT: pandn %xmm1, %xmm2 639; X64-SSE2-NEXT: por %xmm0, %xmm2 640; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 641; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 642; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 643; X64-SSE2-NEXT: pand %xmm1, %xmm2 644; X64-SSE2-NEXT: pandn %xmm0, %xmm1 645; X64-SSE2-NEXT: por %xmm2, %xmm1 646; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 647; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 648; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 649; X64-SSE2-NEXT: pand %xmm2, %xmm1 650; X64-SSE2-NEXT: pandn %xmm0, %xmm2 651; X64-SSE2-NEXT: por %xmm1, %xmm2 652; X64-SSE2-NEXT: movd %xmm2, %eax 653; X64-SSE2-NEXT: retq 654; 655; X64-SSE42-LABEL: test_reduce_v8i32: 656; X64-SSE42: ## %bb.0: 657; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 658; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 659; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 660; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 661; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 662; X64-SSE42-NEXT: movd %xmm0, %eax 663; X64-SSE42-NEXT: retq 664; 665; X64-AVX1-LABEL: test_reduce_v8i32: 666; X64-AVX1: ## %bb.0: 667; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 668; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 669; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 670; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 671; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 672; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 673; X64-AVX1-NEXT: vmovd %xmm0, %eax 674; X64-AVX1-NEXT: vzeroupper 675; X64-AVX1-NEXT: retq 676; 677; X64-AVX2-LABEL: test_reduce_v8i32: 678; X64-AVX2: ## %bb.0: 679; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 680; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 681; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 682; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 683; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 684; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 685; X64-AVX2-NEXT: vmovd %xmm0, %eax 686; X64-AVX2-NEXT: vzeroupper 687; X64-AVX2-NEXT: retq 688; 689; X64-AVX512-LABEL: test_reduce_v8i32: 690; X64-AVX512: ## %bb.0: 691; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 692; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 693; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 694; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 695; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 696; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 697; X64-AVX512-NEXT: vmovd %xmm0, %eax 698; X64-AVX512-NEXT: vzeroupper 699; X64-AVX512-NEXT: retq 700 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 701 %2 = icmp slt <8 x i32> %a0, %1 702 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1 703 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 704 %5 = icmp slt <8 x i32> %3, %4 705 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4 706 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 707 %8 = icmp slt <8 x i32> %6, %7 708 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7 709 %10 = extractelement <8 x i32> %9, i32 0 710 ret i32 %10 711} 712 713define i16 @test_reduce_v16i16(<16 x i16> %a0) { 714; X86-SSE2-LABEL: test_reduce_v16i16: 715; X86-SSE2: ## %bb.0: 716; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 717; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 718; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 719; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 720; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 721; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 722; X86-SSE2-NEXT: psrld $16, %xmm1 723; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 724; X86-SSE2-NEXT: movd %xmm1, %eax 725; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 726; X86-SSE2-NEXT: retl 727; 728; X86-SSE42-LABEL: test_reduce_v16i16: 729; X86-SSE42: ## %bb.0: 730; X86-SSE42-NEXT: pminsw %xmm1, %xmm0 731; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0 732; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 733; X86-SSE42-NEXT: movd %xmm0, %eax 734; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 735; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 736; X86-SSE42-NEXT: retl 737; 738; X86-AVX1-LABEL: test_reduce_v16i16: 739; X86-AVX1: ## %bb.0: 740; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 741; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 742; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0 743; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 744; X86-AVX1-NEXT: vmovd %xmm0, %eax 745; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 746; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 747; X86-AVX1-NEXT: vzeroupper 748; X86-AVX1-NEXT: retl 749; 750; X86-AVX2-LABEL: test_reduce_v16i16: 751; X86-AVX2: ## %bb.0: 752; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 753; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 754; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0 755; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 756; X86-AVX2-NEXT: vmovd %xmm0, %eax 757; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 758; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 759; X86-AVX2-NEXT: vzeroupper 760; X86-AVX2-NEXT: retl 761; 762; X64-SSE2-LABEL: test_reduce_v16i16: 763; X64-SSE2: ## %bb.0: 764; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 765; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 766; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 767; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 768; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 769; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 770; X64-SSE2-NEXT: psrld $16, %xmm1 771; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 772; X64-SSE2-NEXT: movd %xmm1, %eax 773; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 774; X64-SSE2-NEXT: retq 775; 776; X64-SSE42-LABEL: test_reduce_v16i16: 777; X64-SSE42: ## %bb.0: 778; X64-SSE42-NEXT: pminsw %xmm1, %xmm0 779; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 780; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 781; X64-SSE42-NEXT: movd %xmm0, %eax 782; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 783; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 784; X64-SSE42-NEXT: retq 785; 786; X64-AVX1-LABEL: test_reduce_v16i16: 787; X64-AVX1: ## %bb.0: 788; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 789; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 790; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 791; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 792; X64-AVX1-NEXT: vmovd %xmm0, %eax 793; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 794; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 795; X64-AVX1-NEXT: vzeroupper 796; X64-AVX1-NEXT: retq 797; 798; X64-AVX2-LABEL: test_reduce_v16i16: 799; X64-AVX2: ## %bb.0: 800; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 801; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 802; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 803; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 804; X64-AVX2-NEXT: vmovd %xmm0, %eax 805; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 806; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 807; X64-AVX2-NEXT: vzeroupper 808; X64-AVX2-NEXT: retq 809; 810; X64-AVX512-LABEL: test_reduce_v16i16: 811; X64-AVX512: ## %bb.0: 812; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 813; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 814; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 815; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 816; X64-AVX512-NEXT: vmovd %xmm0, %eax 817; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000 818; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 819; X64-AVX512-NEXT: vzeroupper 820; X64-AVX512-NEXT: retq 821 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 822 %2 = icmp slt <16 x i16> %a0, %1 823 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 824 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 825 %5 = icmp slt <16 x i16> %3, %4 826 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 827 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 828 %8 = icmp slt <16 x i16> %6, %7 829 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 830 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 831 %11 = icmp slt <16 x i16> %9, %10 832 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10 833 %13 = extractelement <16 x i16> %12, i32 0 834 ret i16 %13 835} 836 837define i8 @test_reduce_v32i8(<32 x i8> %a0) { 838; X86-SSE2-LABEL: test_reduce_v32i8: 839; X86-SSE2: ## %bb.0: 840; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 841; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 842; X86-SSE2-NEXT: pand %xmm2, %xmm0 843; X86-SSE2-NEXT: pandn %xmm1, %xmm2 844; X86-SSE2-NEXT: por %xmm0, %xmm2 845; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 846; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 847; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 848; X86-SSE2-NEXT: pand %xmm1, %xmm2 849; X86-SSE2-NEXT: pandn %xmm0, %xmm1 850; X86-SSE2-NEXT: por %xmm2, %xmm1 851; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 852; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 853; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 854; X86-SSE2-NEXT: pand %xmm2, %xmm1 855; X86-SSE2-NEXT: pandn %xmm0, %xmm2 856; X86-SSE2-NEXT: por %xmm1, %xmm2 857; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 858; X86-SSE2-NEXT: psrld $16, %xmm0 859; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 860; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 861; X86-SSE2-NEXT: pand %xmm1, %xmm2 862; X86-SSE2-NEXT: pandn %xmm0, %xmm1 863; X86-SSE2-NEXT: por %xmm2, %xmm1 864; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 865; X86-SSE2-NEXT: psrlw $8, %xmm0 866; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 867; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 868; X86-SSE2-NEXT: pand %xmm2, %xmm1 869; X86-SSE2-NEXT: pandn %xmm0, %xmm2 870; X86-SSE2-NEXT: por %xmm1, %xmm2 871; X86-SSE2-NEXT: movd %xmm2, %eax 872; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 873; X86-SSE2-NEXT: retl 874; 875; X86-SSE42-LABEL: test_reduce_v32i8: 876; X86-SSE42: ## %bb.0: 877; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 878; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0 879; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 880; X86-SSE42-NEXT: psrlw $8, %xmm1 881; X86-SSE42-NEXT: pminub %xmm0, %xmm1 882; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 883; X86-SSE42-NEXT: movd %xmm0, %eax 884; X86-SSE42-NEXT: xorb $-128, %al 885; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 886; X86-SSE42-NEXT: retl 887; 888; X86-AVX1-LABEL: test_reduce_v32i8: 889; X86-AVX1: ## %bb.0: 890; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 891; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 892; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0 893; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 894; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 895; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 896; X86-AVX1-NEXT: vmovd %xmm0, %eax 897; X86-AVX1-NEXT: xorb $-128, %al 898; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 899; X86-AVX1-NEXT: vzeroupper 900; X86-AVX1-NEXT: retl 901; 902; X86-AVX2-LABEL: test_reduce_v32i8: 903; X86-AVX2: ## %bb.0: 904; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 905; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 906; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0 907; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 908; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 909; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 910; X86-AVX2-NEXT: vmovd %xmm0, %eax 911; X86-AVX2-NEXT: xorb $-128, %al 912; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 913; X86-AVX2-NEXT: vzeroupper 914; X86-AVX2-NEXT: retl 915; 916; X64-SSE2-LABEL: test_reduce_v32i8: 917; X64-SSE2: ## %bb.0: 918; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 919; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 920; X64-SSE2-NEXT: pand %xmm2, %xmm0 921; X64-SSE2-NEXT: pandn %xmm1, %xmm2 922; X64-SSE2-NEXT: por %xmm0, %xmm2 923; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] 924; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 925; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 926; X64-SSE2-NEXT: pand %xmm1, %xmm2 927; X64-SSE2-NEXT: pandn %xmm0, %xmm1 928; X64-SSE2-NEXT: por %xmm2, %xmm1 929; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 930; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 931; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 932; X64-SSE2-NEXT: pand %xmm2, %xmm1 933; X64-SSE2-NEXT: pandn %xmm0, %xmm2 934; X64-SSE2-NEXT: por %xmm1, %xmm2 935; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 936; X64-SSE2-NEXT: psrld $16, %xmm0 937; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 938; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 939; X64-SSE2-NEXT: pand %xmm1, %xmm2 940; X64-SSE2-NEXT: pandn %xmm0, %xmm1 941; X64-SSE2-NEXT: por %xmm2, %xmm1 942; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 943; X64-SSE2-NEXT: psrlw $8, %xmm0 944; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 945; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 946; X64-SSE2-NEXT: pand %xmm2, %xmm1 947; X64-SSE2-NEXT: pandn %xmm0, %xmm2 948; X64-SSE2-NEXT: por %xmm1, %xmm2 949; X64-SSE2-NEXT: movd %xmm2, %eax 950; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 951; X64-SSE2-NEXT: retq 952; 953; X64-SSE42-LABEL: test_reduce_v32i8: 954; X64-SSE42: ## %bb.0: 955; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 956; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 957; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 958; X64-SSE42-NEXT: psrlw $8, %xmm1 959; X64-SSE42-NEXT: pminub %xmm0, %xmm1 960; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 961; X64-SSE42-NEXT: movd %xmm0, %eax 962; X64-SSE42-NEXT: xorb $-128, %al 963; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 964; X64-SSE42-NEXT: retq 965; 966; X64-AVX1-LABEL: test_reduce_v32i8: 967; X64-AVX1: ## %bb.0: 968; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 969; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 970; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 971; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 972; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 973; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 974; X64-AVX1-NEXT: vmovd %xmm0, %eax 975; X64-AVX1-NEXT: xorb $-128, %al 976; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 977; X64-AVX1-NEXT: vzeroupper 978; X64-AVX1-NEXT: retq 979; 980; X64-AVX2-LABEL: test_reduce_v32i8: 981; X64-AVX2: ## %bb.0: 982; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 983; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 984; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 985; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 986; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 987; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 988; X64-AVX2-NEXT: vmovd %xmm0, %eax 989; X64-AVX2-NEXT: xorb $-128, %al 990; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 991; X64-AVX2-NEXT: vzeroupper 992; X64-AVX2-NEXT: retq 993; 994; X64-AVX512-LABEL: test_reduce_v32i8: 995; X64-AVX512: ## %bb.0: 996; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 997; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 998; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 999; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1000; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1001; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1002; X64-AVX512-NEXT: vmovd %xmm0, %eax 1003; X64-AVX512-NEXT: xorb $-128, %al 1004; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1005; X64-AVX512-NEXT: vzeroupper 1006; X64-AVX512-NEXT: retq 1007 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1008 %2 = icmp slt <32 x i8> %a0, %1 1009 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 1010 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1011 %5 = icmp slt <32 x i8> %3, %4 1012 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 1013 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1014 %8 = icmp slt <32 x i8> %6, %7 1015 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 1016 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1017 %11 = icmp slt <32 x i8> %9, %10 1018 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 1019 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1020 %14 = icmp slt <32 x i8> %12, %13 1021 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13 1022 %16 = extractelement <32 x i8> %15, i32 0 1023 ret i8 %16 1024} 1025 1026; 1027; 512-bit Vectors 1028; 1029 1030define i64 @test_reduce_v8i64(<8 x i64> %a0) { 1031; X86-SSE2-LABEL: test_reduce_v8i64: 1032; X86-SSE2: ## %bb.0: 1033; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0] 1034; X86-SSE2-NEXT: movdqa %xmm1, %xmm5 1035; X86-SSE2-NEXT: pxor %xmm4, %xmm5 1036; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1037; X86-SSE2-NEXT: pxor %xmm4, %xmm6 1038; X86-SSE2-NEXT: movdqa %xmm6, %xmm7 1039; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1040; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1041; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2] 1042; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1043; X86-SSE2-NEXT: pand %xmm5, %xmm6 1044; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1045; X86-SSE2-NEXT: por %xmm6, %xmm5 1046; X86-SSE2-NEXT: pand %xmm5, %xmm1 1047; X86-SSE2-NEXT: pandn %xmm3, %xmm5 1048; X86-SSE2-NEXT: por %xmm1, %xmm5 1049; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1050; X86-SSE2-NEXT: pxor %xmm4, %xmm1 1051; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1052; X86-SSE2-NEXT: pxor %xmm4, %xmm3 1053; X86-SSE2-NEXT: movdqa %xmm3, %xmm6 1054; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1055; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1056; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2] 1057; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1058; X86-SSE2-NEXT: pand %xmm1, %xmm3 1059; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] 1060; X86-SSE2-NEXT: por %xmm3, %xmm1 1061; X86-SSE2-NEXT: pand %xmm1, %xmm0 1062; X86-SSE2-NEXT: pandn %xmm2, %xmm1 1063; X86-SSE2-NEXT: por %xmm0, %xmm1 1064; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1065; X86-SSE2-NEXT: pxor %xmm4, %xmm0 1066; X86-SSE2-NEXT: movdqa %xmm5, %xmm2 1067; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1068; X86-SSE2-NEXT: movdqa %xmm2, %xmm3 1069; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1070; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2 1071; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1072; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1073; X86-SSE2-NEXT: pand %xmm0, %xmm2 1074; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] 1075; X86-SSE2-NEXT: por %xmm2, %xmm0 1076; X86-SSE2-NEXT: pand %xmm0, %xmm1 1077; X86-SSE2-NEXT: pandn %xmm5, %xmm0 1078; X86-SSE2-NEXT: por %xmm1, %xmm0 1079; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1080; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1081; X86-SSE2-NEXT: pxor %xmm4, %xmm2 1082; X86-SSE2-NEXT: pxor %xmm1, %xmm4 1083; X86-SSE2-NEXT: movdqa %xmm4, %xmm3 1084; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1085; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1086; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2] 1087; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 1088; X86-SSE2-NEXT: pand %xmm2, %xmm4 1089; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1090; X86-SSE2-NEXT: por %xmm4, %xmm2 1091; X86-SSE2-NEXT: pand %xmm2, %xmm0 1092; X86-SSE2-NEXT: pandn %xmm1, %xmm2 1093; X86-SSE2-NEXT: por %xmm0, %xmm2 1094; X86-SSE2-NEXT: movd %xmm2, %eax 1095; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1096; X86-SSE2-NEXT: movd %xmm0, %edx 1097; X86-SSE2-NEXT: retl 1098; 1099; X86-SSE42-LABEL: test_reduce_v8i64: 1100; X86-SSE42: ## %bb.0: 1101; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 1102; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 1103; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1104; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1105; X86-SSE42-NEXT: movdqa %xmm3, %xmm0 1106; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1107; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1108; X86-SSE42-NEXT: movapd %xmm3, %xmm0 1109; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1110; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1111; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1112; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1113; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1114; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1115; X86-SSE42-NEXT: movd %xmm1, %eax 1116; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx 1117; X86-SSE42-NEXT: retl 1118; 1119; X86-AVX1-LABEL: test_reduce_v8i64: 1120; X86-AVX1: ## %bb.0: 1121; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1122; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1123; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 1124; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5 1125; X86-AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0 1126; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1 1127; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1128; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1129; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1130; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1131; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1132; X86-AVX1-NEXT: vmovd %xmm0, %eax 1133; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx 1134; X86-AVX1-NEXT: vzeroupper 1135; X86-AVX1-NEXT: retl 1136; 1137; X86-AVX2-LABEL: test_reduce_v8i64: 1138; X86-AVX2: ## %bb.0: 1139; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1140; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1141; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1142; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1143; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1144; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1145; X86-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1146; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1147; X86-AVX2-NEXT: vmovd %xmm0, %eax 1148; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx 1149; X86-AVX2-NEXT: vzeroupper 1150; X86-AVX2-NEXT: retl 1151; 1152; X64-SSE2-LABEL: test_reduce_v8i64: 1153; X64-SSE2: ## %bb.0: 1154; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 1155; X64-SSE2-NEXT: movdqa %xmm1, %xmm5 1156; X64-SSE2-NEXT: pxor %xmm4, %xmm5 1157; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1158; X64-SSE2-NEXT: pxor %xmm4, %xmm6 1159; X64-SSE2-NEXT: movdqa %xmm6, %xmm7 1160; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1161; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1162; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1163; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1164; X64-SSE2-NEXT: pand %xmm8, %xmm6 1165; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1166; X64-SSE2-NEXT: por %xmm6, %xmm5 1167; X64-SSE2-NEXT: pand %xmm5, %xmm1 1168; X64-SSE2-NEXT: pandn %xmm3, %xmm5 1169; X64-SSE2-NEXT: por %xmm1, %xmm5 1170; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1171; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1172; X64-SSE2-NEXT: movdqa %xmm2, %xmm3 1173; X64-SSE2-NEXT: pxor %xmm4, %xmm3 1174; X64-SSE2-NEXT: movdqa %xmm3, %xmm6 1175; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm6 1176; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 1177; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm3 1178; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1179; X64-SSE2-NEXT: pand %xmm7, %xmm1 1180; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 1181; X64-SSE2-NEXT: por %xmm1, %xmm3 1182; X64-SSE2-NEXT: pand %xmm3, %xmm0 1183; X64-SSE2-NEXT: pandn %xmm2, %xmm3 1184; X64-SSE2-NEXT: por %xmm0, %xmm3 1185; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1186; X64-SSE2-NEXT: pxor %xmm4, %xmm0 1187; X64-SSE2-NEXT: movdqa %xmm5, %xmm1 1188; X64-SSE2-NEXT: pxor %xmm4, %xmm1 1189; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1190; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1191; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2] 1192; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1 1193; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1194; X64-SSE2-NEXT: pand %xmm6, %xmm0 1195; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1196; X64-SSE2-NEXT: por %xmm0, %xmm1 1197; X64-SSE2-NEXT: pand %xmm1, %xmm3 1198; X64-SSE2-NEXT: pandn %xmm5, %xmm1 1199; X64-SSE2-NEXT: por %xmm3, %xmm1 1200; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1201; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 1202; X64-SSE2-NEXT: pxor %xmm4, %xmm2 1203; X64-SSE2-NEXT: pxor %xmm0, %xmm4 1204; X64-SSE2-NEXT: movdqa %xmm4, %xmm3 1205; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1206; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] 1207; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1208; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1209; X64-SSE2-NEXT: pand %xmm5, %xmm2 1210; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1211; X64-SSE2-NEXT: por %xmm2, %xmm3 1212; X64-SSE2-NEXT: pand %xmm3, %xmm1 1213; X64-SSE2-NEXT: pandn %xmm0, %xmm3 1214; X64-SSE2-NEXT: por %xmm1, %xmm3 1215; X64-SSE2-NEXT: movq %xmm3, %rax 1216; X64-SSE2-NEXT: retq 1217; 1218; X64-SSE42-LABEL: test_reduce_v8i64: 1219; X64-SSE42: ## %bb.0: 1220; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 1221; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 1222; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1223; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1224; X64-SSE42-NEXT: movdqa %xmm3, %xmm0 1225; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1226; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1227; X64-SSE42-NEXT: movapd %xmm3, %xmm0 1228; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1229; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3 1230; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3] 1231; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1232; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0 1233; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1 1234; X64-SSE42-NEXT: movq %xmm1, %rax 1235; X64-SSE42-NEXT: retq 1236; 1237; X64-AVX1-LABEL: test_reduce_v8i64: 1238; X64-AVX1: ## %bb.0: 1239; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1240; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1241; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm4 1242; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm5 1243; X64-AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm1, %xmm0 1244; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm3, %xmm1 1245; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1246; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1247; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1248; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1249; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1250; X64-AVX1-NEXT: vmovq %xmm0, %rax 1251; X64-AVX1-NEXT: vzeroupper 1252; X64-AVX1-NEXT: retq 1253; 1254; X64-AVX2-LABEL: test_reduce_v8i64: 1255; X64-AVX2: ## %bb.0: 1256; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1257; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1258; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 1259; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1260; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1261; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1262; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1263; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1264; X64-AVX2-NEXT: vmovq %xmm0, %rax 1265; X64-AVX2-NEXT: vzeroupper 1266; X64-AVX2-NEXT: retq 1267; 1268; X64-AVX512-LABEL: test_reduce_v8i64: 1269; X64-AVX512: ## %bb.0: 1270; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1271; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 1272; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1273; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 1274; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1275; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 1276; X64-AVX512-NEXT: vmovq %xmm0, %rax 1277; X64-AVX512-NEXT: vzeroupper 1278; X64-AVX512-NEXT: retq 1279 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 1280 %2 = icmp slt <8 x i64> %a0, %1 1281 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1 1282 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1283 %5 = icmp slt <8 x i64> %3, %4 1284 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4 1285 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1286 %8 = icmp slt <8 x i64> %6, %7 1287 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7 1288 %10 = extractelement <8 x i64> %9, i32 0 1289 ret i64 %10 1290} 1291 1292define i32 @test_reduce_v16i32(<16 x i32> %a0) { 1293; X86-SSE2-LABEL: test_reduce_v16i32: 1294; X86-SSE2: ## %bb.0: 1295; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 1296; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1297; X86-SSE2-NEXT: pand %xmm4, %xmm0 1298; X86-SSE2-NEXT: pandn %xmm2, %xmm4 1299; X86-SSE2-NEXT: por %xmm0, %xmm4 1300; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1301; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1302; X86-SSE2-NEXT: pand %xmm0, %xmm1 1303; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1304; X86-SSE2-NEXT: por %xmm1, %xmm0 1305; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1306; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1307; X86-SSE2-NEXT: pand %xmm1, %xmm4 1308; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1309; X86-SSE2-NEXT: por %xmm4, %xmm1 1310; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1311; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1312; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1313; X86-SSE2-NEXT: pand %xmm2, %xmm1 1314; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1315; X86-SSE2-NEXT: por %xmm1, %xmm2 1316; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1317; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1318; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1319; X86-SSE2-NEXT: pand %xmm1, %xmm2 1320; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1321; X86-SSE2-NEXT: por %xmm2, %xmm1 1322; X86-SSE2-NEXT: movd %xmm1, %eax 1323; X86-SSE2-NEXT: retl 1324; 1325; X86-SSE42-LABEL: test_reduce_v16i32: 1326; X86-SSE42: ## %bb.0: 1327; X86-SSE42-NEXT: pminsd %xmm3, %xmm1 1328; X86-SSE42-NEXT: pminsd %xmm2, %xmm1 1329; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 1330; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1331; X86-SSE42-NEXT: pminsd %xmm1, %xmm0 1332; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1333; X86-SSE42-NEXT: pminsd %xmm0, %xmm1 1334; X86-SSE42-NEXT: movd %xmm1, %eax 1335; X86-SSE42-NEXT: retl 1336; 1337; X86-AVX1-LABEL: test_reduce_v16i32: 1338; X86-AVX1: ## %bb.0: 1339; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1340; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1341; X86-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 1342; X86-AVX1-NEXT: vpminsd %xmm2, %xmm1, %xmm1 1343; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1344; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1345; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1346; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1347; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1348; X86-AVX1-NEXT: vmovd %xmm0, %eax 1349; X86-AVX1-NEXT: vzeroupper 1350; X86-AVX1-NEXT: retl 1351; 1352; X86-AVX2-LABEL: test_reduce_v16i32: 1353; X86-AVX2: ## %bb.0: 1354; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1355; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1356; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1357; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1358; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1359; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1360; X86-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1361; X86-AVX2-NEXT: vmovd %xmm0, %eax 1362; X86-AVX2-NEXT: vzeroupper 1363; X86-AVX2-NEXT: retl 1364; 1365; X64-SSE2-LABEL: test_reduce_v16i32: 1366; X64-SSE2: ## %bb.0: 1367; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 1368; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1369; X64-SSE2-NEXT: pand %xmm4, %xmm0 1370; X64-SSE2-NEXT: pandn %xmm2, %xmm4 1371; X64-SSE2-NEXT: por %xmm0, %xmm4 1372; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1373; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm0 1374; X64-SSE2-NEXT: pand %xmm0, %xmm1 1375; X64-SSE2-NEXT: pandn %xmm3, %xmm0 1376; X64-SSE2-NEXT: por %xmm1, %xmm0 1377; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1378; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm1 1379; X64-SSE2-NEXT: pand %xmm1, %xmm4 1380; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1381; X64-SSE2-NEXT: por %xmm4, %xmm1 1382; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1383; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1384; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1385; X64-SSE2-NEXT: pand %xmm2, %xmm1 1386; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1387; X64-SSE2-NEXT: por %xmm1, %xmm2 1388; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1389; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1390; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1 1391; X64-SSE2-NEXT: pand %xmm1, %xmm2 1392; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1393; X64-SSE2-NEXT: por %xmm2, %xmm1 1394; X64-SSE2-NEXT: movd %xmm1, %eax 1395; X64-SSE2-NEXT: retq 1396; 1397; X64-SSE42-LABEL: test_reduce_v16i32: 1398; X64-SSE42: ## %bb.0: 1399; X64-SSE42-NEXT: pminsd %xmm3, %xmm1 1400; X64-SSE42-NEXT: pminsd %xmm2, %xmm1 1401; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 1402; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1403; X64-SSE42-NEXT: pminsd %xmm1, %xmm0 1404; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1405; X64-SSE42-NEXT: pminsd %xmm0, %xmm1 1406; X64-SSE42-NEXT: movd %xmm1, %eax 1407; X64-SSE42-NEXT: retq 1408; 1409; X64-AVX1-LABEL: test_reduce_v16i32: 1410; X64-AVX1: ## %bb.0: 1411; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1412; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1413; X64-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 1414; X64-AVX1-NEXT: vpminsd %xmm2, %xmm1, %xmm1 1415; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1416; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1417; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1418; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1419; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1420; X64-AVX1-NEXT: vmovd %xmm0, %eax 1421; X64-AVX1-NEXT: vzeroupper 1422; X64-AVX1-NEXT: retq 1423; 1424; X64-AVX2-LABEL: test_reduce_v16i32: 1425; X64-AVX2: ## %bb.0: 1426; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1427; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1428; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1429; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1430; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1431; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1432; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1433; X64-AVX2-NEXT: vmovd %xmm0, %eax 1434; X64-AVX2-NEXT: vzeroupper 1435; X64-AVX2-NEXT: retq 1436; 1437; X64-AVX512-LABEL: test_reduce_v16i32: 1438; X64-AVX512: ## %bb.0: 1439; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1440; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0 1441; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1442; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1443; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1444; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1445; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1446; X64-AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1447; X64-AVX512-NEXT: vmovd %xmm0, %eax 1448; X64-AVX512-NEXT: vzeroupper 1449; X64-AVX512-NEXT: retq 1450 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1451 %2 = icmp slt <16 x i32> %a0, %1 1452 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1 1453 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1454 %5 = icmp slt <16 x i32> %3, %4 1455 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4 1456 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1457 %8 = icmp slt <16 x i32> %6, %7 1458 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7 1459 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1460 %11 = icmp slt <16 x i32> %9, %10 1461 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10 1462 %13 = extractelement <16 x i32> %12, i32 0 1463 ret i32 %13 1464} 1465 1466define i16 @test_reduce_v32i16(<32 x i16> %a0) { 1467; X86-SSE2-LABEL: test_reduce_v32i16: 1468; X86-SSE2: ## %bb.0: 1469; X86-SSE2-NEXT: pminsw %xmm3, %xmm1 1470; X86-SSE2-NEXT: pminsw %xmm2, %xmm1 1471; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1472; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1473; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1474; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1475; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1476; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1477; X86-SSE2-NEXT: psrld $16, %xmm0 1478; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1479; X86-SSE2-NEXT: movd %xmm0, %eax 1480; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1481; X86-SSE2-NEXT: retl 1482; 1483; X86-SSE42-LABEL: test_reduce_v32i16: 1484; X86-SSE42: ## %bb.0: 1485; X86-SSE42-NEXT: pminsw %xmm3, %xmm1 1486; X86-SSE42-NEXT: pminsw %xmm2, %xmm1 1487; X86-SSE42-NEXT: pminsw %xmm0, %xmm1 1488; X86-SSE42-NEXT: pxor LCPI10_0, %xmm1 1489; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 1490; X86-SSE42-NEXT: movd %xmm0, %eax 1491; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1492; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1493; X86-SSE42-NEXT: retl 1494; 1495; X86-AVX1-LABEL: test_reduce_v32i16: 1496; X86-AVX1: ## %bb.0: 1497; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1498; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1499; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 1500; X86-AVX1-NEXT: vpminsw %xmm2, %xmm1, %xmm1 1501; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1502; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0 1503; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1504; X86-AVX1-NEXT: vmovd %xmm0, %eax 1505; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 1506; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1507; X86-AVX1-NEXT: vzeroupper 1508; X86-AVX1-NEXT: retl 1509; 1510; X86-AVX2-LABEL: test_reduce_v32i16: 1511; X86-AVX2: ## %bb.0: 1512; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1513; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1514; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1515; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0 1516; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1517; X86-AVX2-NEXT: vmovd %xmm0, %eax 1518; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 1519; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1520; X86-AVX2-NEXT: vzeroupper 1521; X86-AVX2-NEXT: retl 1522; 1523; X64-SSE2-LABEL: test_reduce_v32i16: 1524; X64-SSE2: ## %bb.0: 1525; X64-SSE2-NEXT: pminsw %xmm3, %xmm1 1526; X64-SSE2-NEXT: pminsw %xmm2, %xmm1 1527; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1528; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1529; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1530; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1531; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1532; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1533; X64-SSE2-NEXT: psrld $16, %xmm0 1534; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1535; X64-SSE2-NEXT: movd %xmm0, %eax 1536; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1537; X64-SSE2-NEXT: retq 1538; 1539; X64-SSE42-LABEL: test_reduce_v32i16: 1540; X64-SSE42: ## %bb.0: 1541; X64-SSE42-NEXT: pminsw %xmm3, %xmm1 1542; X64-SSE42-NEXT: pminsw %xmm2, %xmm1 1543; X64-SSE42-NEXT: pminsw %xmm0, %xmm1 1544; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm1 1545; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 1546; X64-SSE42-NEXT: movd %xmm0, %eax 1547; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1548; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1549; X64-SSE42-NEXT: retq 1550; 1551; X64-AVX1-LABEL: test_reduce_v32i16: 1552; X64-AVX1: ## %bb.0: 1553; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1554; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1555; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 1556; X64-AVX1-NEXT: vpminsw %xmm2, %xmm1, %xmm1 1557; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1558; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1559; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1560; X64-AVX1-NEXT: vmovd %xmm0, %eax 1561; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 1562; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax 1563; X64-AVX1-NEXT: vzeroupper 1564; X64-AVX1-NEXT: retq 1565; 1566; X64-AVX2-LABEL: test_reduce_v32i16: 1567; X64-AVX2: ## %bb.0: 1568; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1569; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1570; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1571; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1572; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1573; X64-AVX2-NEXT: vmovd %xmm0, %eax 1574; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 1575; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax 1576; X64-AVX2-NEXT: vzeroupper 1577; X64-AVX2-NEXT: retq 1578; 1579; X64-AVX512-LABEL: test_reduce_v32i16: 1580; X64-AVX512: ## %bb.0: 1581; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1582; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1583; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1584; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1585; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1586; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1587; X64-AVX512-NEXT: vmovd %xmm0, %eax 1588; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000 1589; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax 1590; X64-AVX512-NEXT: vzeroupper 1591; X64-AVX512-NEXT: retq 1592 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1593 %2 = icmp slt <32 x i16> %a0, %1 1594 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1595 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1596 %5 = icmp slt <32 x i16> %3, %4 1597 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1598 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1599 %8 = icmp slt <32 x i16> %6, %7 1600 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1601 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1602 %11 = icmp slt <32 x i16> %9, %10 1603 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10 1604 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1605 %14 = icmp slt <32 x i16> %12, %13 1606 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13 1607 %16 = extractelement <32 x i16> %15, i32 0 1608 ret i16 %16 1609} 1610 1611define i8 @test_reduce_v64i8(<64 x i8> %a0) { 1612; X86-SSE2-LABEL: test_reduce_v64i8: 1613; X86-SSE2: ## %bb.0: 1614; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 1615; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm4 1616; X86-SSE2-NEXT: pand %xmm4, %xmm0 1617; X86-SSE2-NEXT: pandn %xmm2, %xmm4 1618; X86-SSE2-NEXT: por %xmm0, %xmm4 1619; X86-SSE2-NEXT: movdqa %xmm3, %xmm0 1620; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 1621; X86-SSE2-NEXT: pand %xmm0, %xmm1 1622; X86-SSE2-NEXT: pandn %xmm3, %xmm0 1623; X86-SSE2-NEXT: por %xmm1, %xmm0 1624; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1625; X86-SSE2-NEXT: pcmpgtb %xmm4, %xmm1 1626; X86-SSE2-NEXT: pand %xmm1, %xmm4 1627; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1628; X86-SSE2-NEXT: por %xmm4, %xmm1 1629; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1630; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1631; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1632; X86-SSE2-NEXT: pand %xmm2, %xmm1 1633; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1634; X86-SSE2-NEXT: por %xmm1, %xmm2 1635; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1636; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1637; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1638; X86-SSE2-NEXT: pand %xmm1, %xmm2 1639; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1640; X86-SSE2-NEXT: por %xmm2, %xmm1 1641; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 1642; X86-SSE2-NEXT: psrld $16, %xmm0 1643; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 1644; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1645; X86-SSE2-NEXT: pand %xmm2, %xmm1 1646; X86-SSE2-NEXT: pandn %xmm0, %xmm2 1647; X86-SSE2-NEXT: por %xmm1, %xmm2 1648; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 1649; X86-SSE2-NEXT: psrlw $8, %xmm0 1650; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1651; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1652; X86-SSE2-NEXT: pand %xmm1, %xmm2 1653; X86-SSE2-NEXT: pandn %xmm0, %xmm1 1654; X86-SSE2-NEXT: por %xmm2, %xmm1 1655; X86-SSE2-NEXT: movd %xmm1, %eax 1656; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1657; X86-SSE2-NEXT: retl 1658; 1659; X86-SSE42-LABEL: test_reduce_v64i8: 1660; X86-SSE42: ## %bb.0: 1661; X86-SSE42-NEXT: pminsb %xmm3, %xmm1 1662; X86-SSE42-NEXT: pminsb %xmm2, %xmm1 1663; X86-SSE42-NEXT: pminsb %xmm0, %xmm1 1664; X86-SSE42-NEXT: pxor LCPI11_0, %xmm1 1665; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 1666; X86-SSE42-NEXT: psrlw $8, %xmm0 1667; X86-SSE42-NEXT: pminub %xmm1, %xmm0 1668; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1669; X86-SSE42-NEXT: movd %xmm0, %eax 1670; X86-SSE42-NEXT: xorb $-128, %al 1671; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1672; X86-SSE42-NEXT: retl 1673; 1674; X86-AVX1-LABEL: test_reduce_v64i8: 1675; X86-AVX1: ## %bb.0: 1676; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1677; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1678; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 1679; X86-AVX1-NEXT: vpminsb %xmm2, %xmm1, %xmm1 1680; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1681; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0 1682; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1683; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1684; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1685; X86-AVX1-NEXT: vmovd %xmm0, %eax 1686; X86-AVX1-NEXT: xorb $-128, %al 1687; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1688; X86-AVX1-NEXT: vzeroupper 1689; X86-AVX1-NEXT: retl 1690; 1691; X86-AVX2-LABEL: test_reduce_v64i8: 1692; X86-AVX2: ## %bb.0: 1693; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1694; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1695; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1696; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0 1697; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1698; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1699; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1700; X86-AVX2-NEXT: vmovd %xmm0, %eax 1701; X86-AVX2-NEXT: xorb $-128, %al 1702; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1703; X86-AVX2-NEXT: vzeroupper 1704; X86-AVX2-NEXT: retl 1705; 1706; X64-SSE2-LABEL: test_reduce_v64i8: 1707; X64-SSE2: ## %bb.0: 1708; X64-SSE2-NEXT: movdqa %xmm2, %xmm4 1709; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm4 1710; X64-SSE2-NEXT: pand %xmm4, %xmm0 1711; X64-SSE2-NEXT: pandn %xmm2, %xmm4 1712; X64-SSE2-NEXT: por %xmm0, %xmm4 1713; X64-SSE2-NEXT: movdqa %xmm3, %xmm0 1714; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm0 1715; X64-SSE2-NEXT: pand %xmm0, %xmm1 1716; X64-SSE2-NEXT: pandn %xmm3, %xmm0 1717; X64-SSE2-NEXT: por %xmm1, %xmm0 1718; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1719; X64-SSE2-NEXT: pcmpgtb %xmm4, %xmm1 1720; X64-SSE2-NEXT: pand %xmm1, %xmm4 1721; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1722; X64-SSE2-NEXT: por %xmm4, %xmm1 1723; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1724; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1725; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1726; X64-SSE2-NEXT: pand %xmm2, %xmm1 1727; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1728; X64-SSE2-NEXT: por %xmm1, %xmm2 1729; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 1730; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1731; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1732; X64-SSE2-NEXT: pand %xmm1, %xmm2 1733; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1734; X64-SSE2-NEXT: por %xmm2, %xmm1 1735; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 1736; X64-SSE2-NEXT: psrld $16, %xmm0 1737; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 1738; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1739; X64-SSE2-NEXT: pand %xmm2, %xmm1 1740; X64-SSE2-NEXT: pandn %xmm0, %xmm2 1741; X64-SSE2-NEXT: por %xmm1, %xmm2 1742; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 1743; X64-SSE2-NEXT: psrlw $8, %xmm0 1744; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1745; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1746; X64-SSE2-NEXT: pand %xmm1, %xmm2 1747; X64-SSE2-NEXT: pandn %xmm0, %xmm1 1748; X64-SSE2-NEXT: por %xmm2, %xmm1 1749; X64-SSE2-NEXT: movd %xmm1, %eax 1750; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 1751; X64-SSE2-NEXT: retq 1752; 1753; X64-SSE42-LABEL: test_reduce_v64i8: 1754; X64-SSE42: ## %bb.0: 1755; X64-SSE42-NEXT: pminsb %xmm3, %xmm1 1756; X64-SSE42-NEXT: pminsb %xmm2, %xmm1 1757; X64-SSE42-NEXT: pminsb %xmm0, %xmm1 1758; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm1 1759; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 1760; X64-SSE42-NEXT: psrlw $8, %xmm0 1761; X64-SSE42-NEXT: pminub %xmm1, %xmm0 1762; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1763; X64-SSE42-NEXT: movd %xmm0, %eax 1764; X64-SSE42-NEXT: xorb $-128, %al 1765; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 1766; X64-SSE42-NEXT: retq 1767; 1768; X64-AVX1-LABEL: test_reduce_v64i8: 1769; X64-AVX1: ## %bb.0: 1770; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1771; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1772; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 1773; X64-AVX1-NEXT: vpminsb %xmm2, %xmm1, %xmm1 1774; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1775; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1776; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 1777; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1778; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 1779; X64-AVX1-NEXT: vmovd %xmm0, %eax 1780; X64-AVX1-NEXT: xorb $-128, %al 1781; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax 1782; X64-AVX1-NEXT: vzeroupper 1783; X64-AVX1-NEXT: retq 1784; 1785; X64-AVX2-LABEL: test_reduce_v64i8: 1786; X64-AVX2: ## %bb.0: 1787; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1788; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 1789; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1790; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1791; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 1792; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 1793; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 1794; X64-AVX2-NEXT: vmovd %xmm0, %eax 1795; X64-AVX2-NEXT: xorb $-128, %al 1796; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax 1797; X64-AVX2-NEXT: vzeroupper 1798; X64-AVX2-NEXT: retq 1799; 1800; X64-AVX512-LABEL: test_reduce_v64i8: 1801; X64-AVX512: ## %bb.0: 1802; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1803; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1804; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 1805; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1806; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1807; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 1808; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 1809; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 1810; X64-AVX512-NEXT: vmovd %xmm0, %eax 1811; X64-AVX512-NEXT: xorb $-128, %al 1812; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax 1813; X64-AVX512-NEXT: vzeroupper 1814; X64-AVX512-NEXT: retq 1815 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1816 %2 = icmp slt <64 x i8> %a0, %1 1817 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 1818 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1819 %5 = icmp slt <64 x i8> %3, %4 1820 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 1821 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1822 %8 = icmp slt <64 x i8> %6, %7 1823 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 1824 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1825 %11 = icmp slt <64 x i8> %9, %10 1826 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 1827 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1828 %14 = icmp slt <64 x i8> %12, %13 1829 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13 1830 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1831 %17 = icmp slt <64 x i8> %15, %16 1832 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16 1833 %19 = extractelement <64 x i8> %18, i32 0 1834 ret i8 %19 1835} 1836 1837; 1838; Partial Vector Reductions 1839; 1840 1841define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { 1842; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: 1843; X86-SSE2: ## %bb.0: 1844; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1845; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1846; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1847; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1848; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1849; X86-SSE2-NEXT: psrld $16, %xmm1 1850; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1851; X86-SSE2-NEXT: movd %xmm1, %eax 1852; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1853; X86-SSE2-NEXT: retl 1854; 1855; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: 1856; X86-SSE42: ## %bb.0: 1857; X86-SSE42-NEXT: pxor LCPI12_0, %xmm0 1858; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1859; X86-SSE42-NEXT: movd %xmm0, %eax 1860; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1861; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1862; X86-SSE42-NEXT: retl 1863; 1864; X86-AVX-LABEL: test_reduce_v16i16_v8i16: 1865; X86-AVX: ## %bb.0: 1866; X86-AVX-NEXT: vpxor LCPI12_0, %xmm0, %xmm0 1867; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1868; X86-AVX-NEXT: vmovd %xmm0, %eax 1869; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 1870; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1871; X86-AVX-NEXT: vzeroupper 1872; X86-AVX-NEXT: retl 1873; 1874; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: 1875; X64-SSE2: ## %bb.0: 1876; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1877; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1878; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1879; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1880; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1881; X64-SSE2-NEXT: psrld $16, %xmm1 1882; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1883; X64-SSE2-NEXT: movd %xmm1, %eax 1884; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1885; X64-SSE2-NEXT: retq 1886; 1887; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: 1888; X64-SSE42: ## %bb.0: 1889; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 1890; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1891; X64-SSE42-NEXT: movd %xmm0, %eax 1892; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1893; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1894; X64-SSE42-NEXT: retq 1895; 1896; X64-AVX-LABEL: test_reduce_v16i16_v8i16: 1897; X64-AVX: ## %bb.0: 1898; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1899; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 1900; X64-AVX-NEXT: vmovd %xmm0, %eax 1901; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 1902; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1903; X64-AVX-NEXT: vzeroupper 1904; X64-AVX-NEXT: retq 1905 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1906 %2 = icmp slt <16 x i16> %a0, %1 1907 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 1908 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1909 %5 = icmp slt <16 x i16> %3, %4 1910 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 1911 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1912 %8 = icmp slt <16 x i16> %6, %7 1913 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 1914 %10 = extractelement <16 x i16> %9, i32 0 1915 ret i16 %10 1916} 1917 1918define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { 1919; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: 1920; X86-SSE2: ## %bb.0: 1921; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1922; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1923; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1924; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 1925; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 1926; X86-SSE2-NEXT: psrld $16, %xmm1 1927; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 1928; X86-SSE2-NEXT: movd %xmm1, %eax 1929; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1930; X86-SSE2-NEXT: retl 1931; 1932; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: 1933; X86-SSE42: ## %bb.0: 1934; X86-SSE42-NEXT: pxor LCPI13_0, %xmm0 1935; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 1936; X86-SSE42-NEXT: movd %xmm0, %eax 1937; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1938; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1939; X86-SSE42-NEXT: retl 1940; 1941; X86-AVX-LABEL: test_reduce_v32i16_v8i16: 1942; X86-AVX: ## %bb.0: 1943; X86-AVX-NEXT: vpxor LCPI13_0, %xmm0, %xmm0 1944; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 1945; X86-AVX-NEXT: vmovd %xmm0, %eax 1946; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 1947; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1948; X86-AVX-NEXT: vzeroupper 1949; X86-AVX-NEXT: retl 1950; 1951; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: 1952; X64-SSE2: ## %bb.0: 1953; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1954; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1955; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] 1956; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 1957; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 1958; X64-SSE2-NEXT: psrld $16, %xmm1 1959; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 1960; X64-SSE2-NEXT: movd %xmm1, %eax 1961; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax 1962; X64-SSE2-NEXT: retq 1963; 1964; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: 1965; X64-SSE42: ## %bb.0: 1966; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 1967; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 1968; X64-SSE42-NEXT: movd %xmm0, %eax 1969; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 1970; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax 1971; X64-SSE42-NEXT: retq 1972; 1973; X64-AVX-LABEL: test_reduce_v32i16_v8i16: 1974; X64-AVX: ## %bb.0: 1975; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 1976; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 1977; X64-AVX-NEXT: vmovd %xmm0, %eax 1978; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 1979; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax 1980; X64-AVX-NEXT: vzeroupper 1981; X64-AVX-NEXT: retq 1982 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1983 %2 = icmp slt <32 x i16> %a0, %1 1984 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 1985 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1986 %5 = icmp slt <32 x i16> %3, %4 1987 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 1988 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1989 %8 = icmp slt <32 x i16> %6, %7 1990 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 1991 %10 = extractelement <32 x i16> %9, i32 0 1992 ret i16 %10 1993} 1994 1995define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { 1996; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: 1997; X86-SSE2: ## %bb.0: 1998; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1999; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 2000; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2001; X86-SSE2-NEXT: pand %xmm2, %xmm0 2002; X86-SSE2-NEXT: pandn %xmm1, %xmm2 2003; X86-SSE2-NEXT: por %xmm0, %xmm2 2004; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2005; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2006; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2007; X86-SSE2-NEXT: pand %xmm1, %xmm2 2008; X86-SSE2-NEXT: pandn %xmm0, %xmm1 2009; X86-SSE2-NEXT: por %xmm2, %xmm1 2010; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2011; X86-SSE2-NEXT: psrld $16, %xmm0 2012; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2013; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2014; X86-SSE2-NEXT: pand %xmm2, %xmm1 2015; X86-SSE2-NEXT: pandn %xmm0, %xmm2 2016; X86-SSE2-NEXT: por %xmm1, %xmm2 2017; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 2018; X86-SSE2-NEXT: psrlw $8, %xmm0 2019; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2020; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2021; X86-SSE2-NEXT: pand %xmm1, %xmm2 2022; X86-SSE2-NEXT: pandn %xmm0, %xmm1 2023; X86-SSE2-NEXT: por %xmm2, %xmm1 2024; X86-SSE2-NEXT: movd %xmm1, %eax 2025; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2026; X86-SSE2-NEXT: retl 2027; 2028; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: 2029; X86-SSE42: ## %bb.0: 2030; X86-SSE42-NEXT: pxor LCPI14_0, %xmm0 2031; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2032; X86-SSE42-NEXT: psrlw $8, %xmm1 2033; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2034; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2035; X86-SSE42-NEXT: movd %xmm0, %eax 2036; X86-SSE42-NEXT: xorb $-128, %al 2037; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2038; X86-SSE42-NEXT: retl 2039; 2040; X86-AVX-LABEL: test_reduce_v32i8_v16i8: 2041; X86-AVX: ## %bb.0: 2042; X86-AVX-NEXT: vpxor LCPI14_0, %xmm0, %xmm0 2043; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2044; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2045; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2046; X86-AVX-NEXT: vmovd %xmm0, %eax 2047; X86-AVX-NEXT: xorb $-128, %al 2048; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2049; X86-AVX-NEXT: vzeroupper 2050; X86-AVX-NEXT: retl 2051; 2052; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: 2053; X64-SSE2: ## %bb.0: 2054; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2055; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 2056; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2057; X64-SSE2-NEXT: pand %xmm2, %xmm0 2058; X64-SSE2-NEXT: pandn %xmm1, %xmm2 2059; X64-SSE2-NEXT: por %xmm0, %xmm2 2060; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2061; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2062; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2063; X64-SSE2-NEXT: pand %xmm1, %xmm2 2064; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2065; X64-SSE2-NEXT: por %xmm2, %xmm1 2066; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2067; X64-SSE2-NEXT: psrld $16, %xmm0 2068; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2069; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2070; X64-SSE2-NEXT: pand %xmm2, %xmm1 2071; X64-SSE2-NEXT: pandn %xmm0, %xmm2 2072; X64-SSE2-NEXT: por %xmm1, %xmm2 2073; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 2074; X64-SSE2-NEXT: psrlw $8, %xmm0 2075; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2076; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2077; X64-SSE2-NEXT: pand %xmm1, %xmm2 2078; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2079; X64-SSE2-NEXT: por %xmm2, %xmm1 2080; X64-SSE2-NEXT: movd %xmm1, %eax 2081; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2082; X64-SSE2-NEXT: retq 2083; 2084; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: 2085; X64-SSE42: ## %bb.0: 2086; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 2087; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2088; X64-SSE42-NEXT: psrlw $8, %xmm1 2089; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2090; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2091; X64-SSE42-NEXT: movd %xmm0, %eax 2092; X64-SSE42-NEXT: xorb $-128, %al 2093; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2094; X64-SSE42-NEXT: retq 2095; 2096; X64-AVX-LABEL: test_reduce_v32i8_v16i8: 2097; X64-AVX: ## %bb.0: 2098; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 2099; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2100; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2101; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2102; X64-AVX-NEXT: vmovd %xmm0, %eax 2103; X64-AVX-NEXT: xorb $-128, %al 2104; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 2105; X64-AVX-NEXT: vzeroupper 2106; X64-AVX-NEXT: retq 2107 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2108 %2 = icmp slt <32 x i8> %a0, %1 2109 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 2110 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2111 %5 = icmp slt <32 x i8> %3, %4 2112 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 2113 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2114 %8 = icmp slt <32 x i8> %6, %7 2115 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 2116 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2117 %11 = icmp slt <32 x i8> %9, %10 2118 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 2119 %13 = extractelement <32 x i8> %12, i32 0 2120 ret i8 %13 2121} 2122 2123define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { 2124; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: 2125; X86-SSE2: ## %bb.0: 2126; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2127; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 2128; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2129; X86-SSE2-NEXT: pand %xmm2, %xmm0 2130; X86-SSE2-NEXT: pandn %xmm1, %xmm2 2131; X86-SSE2-NEXT: por %xmm0, %xmm2 2132; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2133; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2134; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2135; X86-SSE2-NEXT: pand %xmm1, %xmm2 2136; X86-SSE2-NEXT: pandn %xmm0, %xmm1 2137; X86-SSE2-NEXT: por %xmm2, %xmm1 2138; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2139; X86-SSE2-NEXT: psrld $16, %xmm0 2140; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 2141; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2142; X86-SSE2-NEXT: pand %xmm2, %xmm1 2143; X86-SSE2-NEXT: pandn %xmm0, %xmm2 2144; X86-SSE2-NEXT: por %xmm1, %xmm2 2145; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 2146; X86-SSE2-NEXT: psrlw $8, %xmm0 2147; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 2148; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2149; X86-SSE2-NEXT: pand %xmm1, %xmm2 2150; X86-SSE2-NEXT: pandn %xmm0, %xmm1 2151; X86-SSE2-NEXT: por %xmm2, %xmm1 2152; X86-SSE2-NEXT: movd %xmm1, %eax 2153; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2154; X86-SSE2-NEXT: retl 2155; 2156; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: 2157; X86-SSE42: ## %bb.0: 2158; X86-SSE42-NEXT: pxor LCPI15_0, %xmm0 2159; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 2160; X86-SSE42-NEXT: psrlw $8, %xmm1 2161; X86-SSE42-NEXT: pminub %xmm0, %xmm1 2162; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 2163; X86-SSE42-NEXT: movd %xmm0, %eax 2164; X86-SSE42-NEXT: xorb $-128, %al 2165; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2166; X86-SSE42-NEXT: retl 2167; 2168; X86-AVX-LABEL: test_reduce_v64i8_v16i8: 2169; X86-AVX: ## %bb.0: 2170; X86-AVX-NEXT: vpxor LCPI15_0, %xmm0, %xmm0 2171; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2172; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2173; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 2174; X86-AVX-NEXT: vmovd %xmm0, %eax 2175; X86-AVX-NEXT: xorb $-128, %al 2176; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax 2177; X86-AVX-NEXT: vzeroupper 2178; X86-AVX-NEXT: retl 2179; 2180; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: 2181; X64-SSE2: ## %bb.0: 2182; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 2183; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 2184; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 2185; X64-SSE2-NEXT: pand %xmm2, %xmm0 2186; X64-SSE2-NEXT: pandn %xmm1, %xmm2 2187; X64-SSE2-NEXT: por %xmm0, %xmm2 2188; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] 2189; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2190; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2191; X64-SSE2-NEXT: pand %xmm1, %xmm2 2192; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2193; X64-SSE2-NEXT: por %xmm2, %xmm1 2194; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 2195; X64-SSE2-NEXT: psrld $16, %xmm0 2196; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 2197; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 2198; X64-SSE2-NEXT: pand %xmm2, %xmm1 2199; X64-SSE2-NEXT: pandn %xmm0, %xmm2 2200; X64-SSE2-NEXT: por %xmm1, %xmm2 2201; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 2202; X64-SSE2-NEXT: psrlw $8, %xmm0 2203; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 2204; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 2205; X64-SSE2-NEXT: pand %xmm1, %xmm2 2206; X64-SSE2-NEXT: pandn %xmm0, %xmm1 2207; X64-SSE2-NEXT: por %xmm2, %xmm1 2208; X64-SSE2-NEXT: movd %xmm1, %eax 2209; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax 2210; X64-SSE2-NEXT: retq 2211; 2212; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: 2213; X64-SSE42: ## %bb.0: 2214; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 2215; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 2216; X64-SSE42-NEXT: psrlw $8, %xmm1 2217; X64-SSE42-NEXT: pminub %xmm0, %xmm1 2218; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 2219; X64-SSE42-NEXT: movd %xmm0, %eax 2220; X64-SSE42-NEXT: xorb $-128, %al 2221; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax 2222; X64-SSE42-NEXT: retq 2223; 2224; X64-AVX-LABEL: test_reduce_v64i8_v16i8: 2225; X64-AVX: ## %bb.0: 2226; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 2227; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 2228; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 2229; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 2230; X64-AVX-NEXT: vmovd %xmm0, %eax 2231; X64-AVX-NEXT: xorb $-128, %al 2232; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax 2233; X64-AVX-NEXT: vzeroupper 2234; X64-AVX-NEXT: retq 2235 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2236 %2 = icmp slt <64 x i8> %a0, %1 2237 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 2238 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2239 %5 = icmp slt <64 x i8> %3, %4 2240 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 2241 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2242 %8 = icmp slt <64 x i8> %6, %7 2243 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 2244 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 2245 %11 = icmp slt <64 x i8> %9, %10 2246 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 2247 %13 = extractelement <64 x i8> %12, i32 0 2248 ret i8 %13 2249} 2250