1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 9 10; 11; Unsigned Maximum (GT) 12; 13 14define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) { 15; SSE2-LABEL: max_gt_v2i64: 16; SSE2: # BB#0: 17; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 18; SSE2-NEXT: movdqa %xmm1, %xmm3 19; SSE2-NEXT: pxor %xmm2, %xmm3 20; SSE2-NEXT: pxor %xmm0, %xmm2 21; SSE2-NEXT: movdqa %xmm2, %xmm4 22; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 23; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 24; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 25; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 26; SSE2-NEXT: pand %xmm5, %xmm2 27; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 28; SSE2-NEXT: por %xmm2, %xmm3 29; SSE2-NEXT: pand %xmm3, %xmm0 30; SSE2-NEXT: pandn %xmm1, %xmm3 31; SSE2-NEXT: por %xmm3, %xmm0 32; SSE2-NEXT: retq 33; 34; SSE41-LABEL: max_gt_v2i64: 35; SSE41: # BB#0: 36; SSE41-NEXT: movdqa %xmm0, %xmm2 37; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] 38; SSE41-NEXT: movdqa %xmm1, %xmm3 39; SSE41-NEXT: pxor %xmm0, %xmm3 40; SSE41-NEXT: pxor %xmm2, %xmm0 41; SSE41-NEXT: movdqa %xmm0, %xmm4 42; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 43; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 44; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 45; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 46; SSE41-NEXT: pand %xmm5, %xmm3 47; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] 48; SSE41-NEXT: por %xmm3, %xmm0 49; SSE41-NEXT: blendvpd %xmm2, %xmm1 50; SSE41-NEXT: movapd %xmm1, %xmm0 51; SSE41-NEXT: retq 52; 53; SSE42-LABEL: max_gt_v2i64: 54; SSE42: # BB#0: 55; SSE42-NEXT: movdqa %xmm0, %xmm2 56; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 57; SSE42-NEXT: movdqa %xmm1, %xmm3 58; SSE42-NEXT: pxor %xmm0, %xmm3 59; SSE42-NEXT: pxor %xmm2, %xmm0 60; SSE42-NEXT: pcmpgtq %xmm3, %xmm0 61; SSE42-NEXT: blendvpd %xmm2, %xmm1 62; SSE42-NEXT: movapd %xmm1, %xmm0 63; SSE42-NEXT: retq 64; 65; AVX-LABEL: max_gt_v2i64: 66; AVX: # BB#0: 67; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 68; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3 69; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2 70; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 71; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 72; AVX-NEXT: retq 73 %1 = icmp ugt <2 x i64> %a, %b 74 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b 75 ret <2 x i64> %2 76} 77 78define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) { 79; SSE2-LABEL: max_gt_v4i64: 80; SSE2: # BB#0: 81; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 82; SSE2-NEXT: movdqa %xmm3, %xmm5 83; SSE2-NEXT: pxor %xmm4, %xmm5 84; SSE2-NEXT: movdqa %xmm1, %xmm6 85; SSE2-NEXT: pxor %xmm4, %xmm6 86; SSE2-NEXT: movdqa %xmm6, %xmm7 87; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 88; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 89; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 90; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] 91; SSE2-NEXT: pand %xmm8, %xmm5 92; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] 93; SSE2-NEXT: por %xmm5, %xmm6 94; SSE2-NEXT: movdqa %xmm2, %xmm5 95; SSE2-NEXT: pxor %xmm4, %xmm5 96; SSE2-NEXT: pxor %xmm0, %xmm4 97; SSE2-NEXT: movdqa %xmm4, %xmm7 98; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 99; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 100; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 101; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 102; SSE2-NEXT: pand %xmm8, %xmm4 103; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 104; SSE2-NEXT: por %xmm4, %xmm5 105; SSE2-NEXT: pand %xmm5, %xmm0 106; SSE2-NEXT: pandn %xmm2, %xmm5 107; SSE2-NEXT: por %xmm5, %xmm0 108; SSE2-NEXT: pand %xmm6, %xmm1 109; SSE2-NEXT: pandn %xmm3, %xmm6 110; SSE2-NEXT: por %xmm6, %xmm1 111; SSE2-NEXT: retq 112; 113; SSE41-LABEL: max_gt_v4i64: 114; SSE41: # BB#0: 115; SSE41-NEXT: movdqa %xmm0, %xmm8 116; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] 117; SSE41-NEXT: movdqa %xmm3, %xmm5 118; SSE41-NEXT: pxor %xmm0, %xmm5 119; SSE41-NEXT: movdqa %xmm1, %xmm6 120; SSE41-NEXT: pxor %xmm0, %xmm6 121; SSE41-NEXT: movdqa %xmm6, %xmm7 122; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 123; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] 124; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 125; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 126; SSE41-NEXT: pand %xmm4, %xmm6 127; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 128; SSE41-NEXT: por %xmm6, %xmm5 129; SSE41-NEXT: movdqa %xmm2, %xmm4 130; SSE41-NEXT: pxor %xmm0, %xmm4 131; SSE41-NEXT: pxor %xmm8, %xmm0 132; SSE41-NEXT: movdqa %xmm0, %xmm6 133; SSE41-NEXT: pcmpgtd %xmm4, %xmm6 134; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 135; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 136; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 137; SSE41-NEXT: pand %xmm7, %xmm4 138; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 139; SSE41-NEXT: por %xmm4, %xmm0 140; SSE41-NEXT: blendvpd %xmm8, %xmm2 141; SSE41-NEXT: movdqa %xmm5, %xmm0 142; SSE41-NEXT: blendvpd %xmm1, %xmm3 143; SSE41-NEXT: movapd %xmm2, %xmm0 144; SSE41-NEXT: movapd %xmm3, %xmm1 145; SSE41-NEXT: retq 146; 147; SSE42-LABEL: max_gt_v4i64: 148; SSE42: # BB#0: 149; SSE42-NEXT: movdqa %xmm0, %xmm4 150; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 151; SSE42-NEXT: movdqa %xmm3, %xmm6 152; SSE42-NEXT: pxor %xmm0, %xmm6 153; SSE42-NEXT: movdqa %xmm1, %xmm5 154; SSE42-NEXT: pxor %xmm0, %xmm5 155; SSE42-NEXT: pcmpgtq %xmm6, %xmm5 156; SSE42-NEXT: movdqa %xmm2, %xmm6 157; SSE42-NEXT: pxor %xmm0, %xmm6 158; SSE42-NEXT: pxor %xmm4, %xmm0 159; SSE42-NEXT: pcmpgtq %xmm6, %xmm0 160; SSE42-NEXT: blendvpd %xmm4, %xmm2 161; SSE42-NEXT: movdqa %xmm5, %xmm0 162; SSE42-NEXT: blendvpd %xmm1, %xmm3 163; SSE42-NEXT: movapd %xmm2, %xmm0 164; SSE42-NEXT: movapd %xmm3, %xmm1 165; SSE42-NEXT: retq 166; 167; AVX1-LABEL: max_gt_v4i64: 168; AVX1: # BB#0: 169; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 170; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 171; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 172; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 173; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 174; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 175; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm4 176; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 177; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 178; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 179; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 180; AVX1-NEXT: retq 181; 182; AVX2-LABEL: max_gt_v4i64: 183; AVX2: # BB#0: 184; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 185; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 186; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 187; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 188; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 189; AVX2-NEXT: retq 190; 191; AVX512-LABEL: max_gt_v4i64: 192; AVX512: # BB#0: 193; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 194; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3 195; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2 196; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 197; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 198; AVX512-NEXT: retq 199 %1 = icmp ugt <4 x i64> %a, %b 200 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b 201 ret <4 x i64> %2 202} 203 204define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) { 205; SSE2-LABEL: max_gt_v4i32: 206; SSE2: # BB#0: 207; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 208; SSE2-NEXT: movdqa %xmm1, %xmm3 209; SSE2-NEXT: pxor %xmm2, %xmm3 210; SSE2-NEXT: pxor %xmm0, %xmm2 211; SSE2-NEXT: pcmpgtd %xmm3, %xmm2 212; SSE2-NEXT: pand %xmm2, %xmm0 213; SSE2-NEXT: pandn %xmm1, %xmm2 214; SSE2-NEXT: por %xmm2, %xmm0 215; SSE2-NEXT: retq 216; 217; SSE41-LABEL: max_gt_v4i32: 218; SSE41: # BB#0: 219; SSE41-NEXT: pmaxud %xmm1, %xmm0 220; SSE41-NEXT: retq 221; 222; SSE42-LABEL: max_gt_v4i32: 223; SSE42: # BB#0: 224; SSE42-NEXT: pmaxud %xmm1, %xmm0 225; SSE42-NEXT: retq 226; 227; AVX-LABEL: max_gt_v4i32: 228; AVX: # BB#0: 229; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 230; AVX-NEXT: retq 231 %1 = icmp ugt <4 x i32> %a, %b 232 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 233 ret <4 x i32> %2 234} 235 236define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) { 237; SSE2-LABEL: max_gt_v8i32: 238; SSE2: # BB#0: 239; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] 240; SSE2-NEXT: movdqa %xmm3, %xmm6 241; SSE2-NEXT: pxor %xmm5, %xmm6 242; SSE2-NEXT: movdqa %xmm1, %xmm4 243; SSE2-NEXT: pxor %xmm5, %xmm4 244; SSE2-NEXT: pcmpgtd %xmm6, %xmm4 245; SSE2-NEXT: movdqa %xmm2, %xmm6 246; SSE2-NEXT: pxor %xmm5, %xmm6 247; SSE2-NEXT: pxor %xmm0, %xmm5 248; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 249; SSE2-NEXT: pand %xmm5, %xmm0 250; SSE2-NEXT: pandn %xmm2, %xmm5 251; SSE2-NEXT: por %xmm5, %xmm0 252; SSE2-NEXT: pand %xmm4, %xmm1 253; SSE2-NEXT: pandn %xmm3, %xmm4 254; SSE2-NEXT: por %xmm1, %xmm4 255; SSE2-NEXT: movdqa %xmm4, %xmm1 256; SSE2-NEXT: retq 257; 258; SSE41-LABEL: max_gt_v8i32: 259; SSE41: # BB#0: 260; SSE41-NEXT: pmaxud %xmm2, %xmm0 261; SSE41-NEXT: pmaxud %xmm3, %xmm1 262; SSE41-NEXT: retq 263; 264; SSE42-LABEL: max_gt_v8i32: 265; SSE42: # BB#0: 266; SSE42-NEXT: pmaxud %xmm2, %xmm0 267; SSE42-NEXT: pmaxud %xmm3, %xmm1 268; SSE42-NEXT: retq 269; 270; AVX1-LABEL: max_gt_v8i32: 271; AVX1: # BB#0: 272; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 273; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 274; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 275; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 276; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 277; AVX1-NEXT: retq 278; 279; AVX2-LABEL: max_gt_v8i32: 280; AVX2: # BB#0: 281; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 282; AVX2-NEXT: retq 283; 284; AVX512-LABEL: max_gt_v8i32: 285; AVX512: # BB#0: 286; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 287; AVX512-NEXT: retq 288 %1 = icmp ugt <8 x i32> %a, %b 289 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b 290 ret <8 x i32> %2 291} 292 293define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) { 294; SSE2-LABEL: max_gt_v8i16: 295; SSE2: # BB#0: 296; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 297; SSE2-NEXT: movdqa %xmm1, %xmm3 298; SSE2-NEXT: pxor %xmm2, %xmm3 299; SSE2-NEXT: pxor %xmm0, %xmm2 300; SSE2-NEXT: pcmpgtw %xmm3, %xmm2 301; SSE2-NEXT: pand %xmm2, %xmm0 302; SSE2-NEXT: pandn %xmm1, %xmm2 303; SSE2-NEXT: por %xmm2, %xmm0 304; SSE2-NEXT: retq 305; 306; SSE41-LABEL: max_gt_v8i16: 307; SSE41: # BB#0: 308; SSE41-NEXT: pmaxuw %xmm1, %xmm0 309; SSE41-NEXT: retq 310; 311; SSE42-LABEL: max_gt_v8i16: 312; SSE42: # BB#0: 313; SSE42-NEXT: pmaxuw %xmm1, %xmm0 314; SSE42-NEXT: retq 315; 316; AVX-LABEL: max_gt_v8i16: 317; AVX: # BB#0: 318; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 319; AVX-NEXT: retq 320 %1 = icmp ugt <8 x i16> %a, %b 321 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 322 ret <8 x i16> %2 323} 324 325define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) { 326; SSE2-LABEL: max_gt_v16i16: 327; SSE2: # BB#0: 328; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [32768,32768,32768,32768,32768,32768,32768,32768] 329; SSE2-NEXT: movdqa %xmm3, %xmm6 330; SSE2-NEXT: pxor %xmm5, %xmm6 331; SSE2-NEXT: movdqa %xmm1, %xmm4 332; SSE2-NEXT: pxor %xmm5, %xmm4 333; SSE2-NEXT: pcmpgtw %xmm6, %xmm4 334; SSE2-NEXT: movdqa %xmm2, %xmm6 335; SSE2-NEXT: pxor %xmm5, %xmm6 336; SSE2-NEXT: pxor %xmm0, %xmm5 337; SSE2-NEXT: pcmpgtw %xmm6, %xmm5 338; SSE2-NEXT: pand %xmm5, %xmm0 339; SSE2-NEXT: pandn %xmm2, %xmm5 340; SSE2-NEXT: por %xmm5, %xmm0 341; SSE2-NEXT: pand %xmm4, %xmm1 342; SSE2-NEXT: pandn %xmm3, %xmm4 343; SSE2-NEXT: por %xmm1, %xmm4 344; SSE2-NEXT: movdqa %xmm4, %xmm1 345; SSE2-NEXT: retq 346; 347; SSE41-LABEL: max_gt_v16i16: 348; SSE41: # BB#0: 349; SSE41-NEXT: pmaxuw %xmm2, %xmm0 350; SSE41-NEXT: pmaxuw %xmm3, %xmm1 351; SSE41-NEXT: retq 352; 353; SSE42-LABEL: max_gt_v16i16: 354; SSE42: # BB#0: 355; SSE42-NEXT: pmaxuw %xmm2, %xmm0 356; SSE42-NEXT: pmaxuw %xmm3, %xmm1 357; SSE42-NEXT: retq 358; 359; AVX1-LABEL: max_gt_v16i16: 360; AVX1: # BB#0: 361; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 362; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 363; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 364; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 365; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 366; AVX1-NEXT: retq 367; 368; AVX2-LABEL: max_gt_v16i16: 369; AVX2: # BB#0: 370; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 371; AVX2-NEXT: retq 372; 373; AVX512-LABEL: max_gt_v16i16: 374; AVX512: # BB#0: 375; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 376; AVX512-NEXT: retq 377 %1 = icmp ugt <16 x i16> %a, %b 378 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b 379 ret <16 x i16> %2 380} 381 382define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) { 383; SSE-LABEL: max_gt_v16i8: 384; SSE: # BB#0: 385; SSE-NEXT: pmaxub %xmm1, %xmm0 386; SSE-NEXT: retq 387; 388; AVX-LABEL: max_gt_v16i8: 389; AVX: # BB#0: 390; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 391; AVX-NEXT: retq 392 %1 = icmp ugt <16 x i8> %a, %b 393 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 394 ret <16 x i8> %2 395} 396 397define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) { 398; SSE-LABEL: max_gt_v32i8: 399; SSE: # BB#0: 400; SSE-NEXT: pmaxub %xmm2, %xmm0 401; SSE-NEXT: pmaxub %xmm3, %xmm1 402; SSE-NEXT: retq 403; 404; AVX1-LABEL: max_gt_v32i8: 405; AVX1: # BB#0: 406; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 407; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 408; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 409; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 410; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 411; AVX1-NEXT: retq 412; 413; AVX2-LABEL: max_gt_v32i8: 414; AVX2: # BB#0: 415; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 416; AVX2-NEXT: retq 417; 418; AVX512-LABEL: max_gt_v32i8: 419; AVX512: # BB#0: 420; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 421; AVX512-NEXT: retq 422 %1 = icmp ugt <32 x i8> %a, %b 423 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b 424 ret <32 x i8> %2 425} 426 427; 428; Unsigned Maximum (GE) 429; 430 431define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) { 432; SSE2-LABEL: max_ge_v2i64: 433; SSE2: # BB#0: 434; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 435; SSE2-NEXT: movdqa %xmm0, %xmm3 436; SSE2-NEXT: pxor %xmm2, %xmm3 437; SSE2-NEXT: pxor %xmm1, %xmm2 438; SSE2-NEXT: movdqa %xmm2, %xmm4 439; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 440; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 441; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 442; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 443; SSE2-NEXT: pand %xmm5, %xmm2 444; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 445; SSE2-NEXT: por %xmm2, %xmm3 446; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 447; SSE2-NEXT: pxor %xmm3, %xmm2 448; SSE2-NEXT: pandn %xmm0, %xmm3 449; SSE2-NEXT: pandn %xmm1, %xmm2 450; SSE2-NEXT: por %xmm3, %xmm2 451; SSE2-NEXT: movdqa %xmm2, %xmm0 452; SSE2-NEXT: retq 453; 454; SSE41-LABEL: max_ge_v2i64: 455; SSE41: # BB#0: 456; SSE41-NEXT: movdqa %xmm0, %xmm2 457; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] 458; SSE41-NEXT: movdqa %xmm2, %xmm3 459; SSE41-NEXT: pxor %xmm0, %xmm3 460; SSE41-NEXT: pxor %xmm1, %xmm0 461; SSE41-NEXT: movdqa %xmm0, %xmm4 462; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 463; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 464; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 465; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 466; SSE41-NEXT: pand %xmm5, %xmm0 467; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 468; SSE41-NEXT: por %xmm0, %xmm3 469; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 470; SSE41-NEXT: pxor %xmm3, %xmm0 471; SSE41-NEXT: blendvpd %xmm2, %xmm1 472; SSE41-NEXT: movapd %xmm1, %xmm0 473; SSE41-NEXT: retq 474; 475; SSE42-LABEL: max_ge_v2i64: 476; SSE42: # BB#0: 477; SSE42-NEXT: movdqa %xmm0, %xmm2 478; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 479; SSE42-NEXT: pxor %xmm3, %xmm0 480; SSE42-NEXT: pxor %xmm1, %xmm3 481; SSE42-NEXT: pcmpgtq %xmm0, %xmm3 482; SSE42-NEXT: pcmpeqd %xmm0, %xmm0 483; SSE42-NEXT: pxor %xmm3, %xmm0 484; SSE42-NEXT: blendvpd %xmm2, %xmm1 485; SSE42-NEXT: movapd %xmm1, %xmm0 486; SSE42-NEXT: retq 487; 488; AVX-LABEL: max_ge_v2i64: 489; AVX: # BB#0: 490; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 491; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 492; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2 493; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 494; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 495; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 496; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 497; AVX-NEXT: retq 498 %1 = icmp uge <2 x i64> %a, %b 499 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b 500 ret <2 x i64> %2 501} 502 503define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) { 504; SSE2-LABEL: max_ge_v4i64: 505; SSE2: # BB#0: 506; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648] 507; SSE2-NEXT: movdqa %xmm1, %xmm4 508; SSE2-NEXT: pxor %xmm7, %xmm4 509; SSE2-NEXT: movdqa %xmm3, %xmm5 510; SSE2-NEXT: pxor %xmm7, %xmm5 511; SSE2-NEXT: movdqa %xmm5, %xmm6 512; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 513; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] 514; SSE2-NEXT: pcmpeqd %xmm4, %xmm5 515; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 516; SSE2-NEXT: pand %xmm8, %xmm4 517; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3] 518; SSE2-NEXT: por %xmm4, %xmm8 519; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 520; SSE2-NEXT: movdqa %xmm8, %xmm9 521; SSE2-NEXT: pxor %xmm4, %xmm9 522; SSE2-NEXT: movdqa %xmm0, %xmm6 523; SSE2-NEXT: pxor %xmm7, %xmm6 524; SSE2-NEXT: pxor %xmm2, %xmm7 525; SSE2-NEXT: movdqa %xmm7, %xmm5 526; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 527; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2] 528; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 529; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] 530; SSE2-NEXT: pand %xmm10, %xmm6 531; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 532; SSE2-NEXT: por %xmm6, %xmm5 533; SSE2-NEXT: pxor %xmm5, %xmm4 534; SSE2-NEXT: pandn %xmm0, %xmm5 535; SSE2-NEXT: pandn %xmm2, %xmm4 536; SSE2-NEXT: por %xmm5, %xmm4 537; SSE2-NEXT: pandn %xmm1, %xmm8 538; SSE2-NEXT: pandn %xmm3, %xmm9 539; SSE2-NEXT: por %xmm8, %xmm9 540; SSE2-NEXT: movdqa %xmm4, %xmm0 541; SSE2-NEXT: movdqa %xmm9, %xmm1 542; SSE2-NEXT: retq 543; 544; SSE41-LABEL: max_ge_v4i64: 545; SSE41: # BB#0: 546; SSE41-NEXT: movdqa %xmm0, %xmm8 547; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] 548; SSE41-NEXT: movdqa %xmm1, %xmm5 549; SSE41-NEXT: pxor %xmm0, %xmm5 550; SSE41-NEXT: movdqa %xmm3, %xmm6 551; SSE41-NEXT: pxor %xmm0, %xmm6 552; SSE41-NEXT: movdqa %xmm6, %xmm7 553; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 554; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] 555; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 556; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 557; SSE41-NEXT: pand %xmm4, %xmm6 558; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 559; SSE41-NEXT: por %xmm6, %xmm5 560; SSE41-NEXT: pcmpeqd %xmm9, %xmm9 561; SSE41-NEXT: pxor %xmm9, %xmm5 562; SSE41-NEXT: movdqa %xmm8, %xmm6 563; SSE41-NEXT: pxor %xmm0, %xmm6 564; SSE41-NEXT: pxor %xmm2, %xmm0 565; SSE41-NEXT: movdqa %xmm0, %xmm7 566; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 567; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] 568; SSE41-NEXT: pcmpeqd %xmm6, %xmm0 569; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] 570; SSE41-NEXT: pand %xmm4, %xmm6 571; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] 572; SSE41-NEXT: por %xmm6, %xmm0 573; SSE41-NEXT: pxor %xmm9, %xmm0 574; SSE41-NEXT: blendvpd %xmm8, %xmm2 575; SSE41-NEXT: movdqa %xmm5, %xmm0 576; SSE41-NEXT: blendvpd %xmm1, %xmm3 577; SSE41-NEXT: movapd %xmm2, %xmm0 578; SSE41-NEXT: movapd %xmm3, %xmm1 579; SSE41-NEXT: retq 580; 581; SSE42-LABEL: max_ge_v4i64: 582; SSE42: # BB#0: 583; SSE42-NEXT: movdqa %xmm0, %xmm4 584; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 585; SSE42-NEXT: movdqa %xmm1, %xmm6 586; SSE42-NEXT: pxor %xmm0, %xmm6 587; SSE42-NEXT: movdqa %xmm3, %xmm5 588; SSE42-NEXT: pxor %xmm0, %xmm5 589; SSE42-NEXT: pcmpgtq %xmm6, %xmm5 590; SSE42-NEXT: pcmpeqd %xmm6, %xmm6 591; SSE42-NEXT: pxor %xmm6, %xmm5 592; SSE42-NEXT: movdqa %xmm4, %xmm7 593; SSE42-NEXT: pxor %xmm0, %xmm7 594; SSE42-NEXT: pxor %xmm2, %xmm0 595; SSE42-NEXT: pcmpgtq %xmm7, %xmm0 596; SSE42-NEXT: pxor %xmm6, %xmm0 597; SSE42-NEXT: blendvpd %xmm4, %xmm2 598; SSE42-NEXT: movdqa %xmm5, %xmm0 599; SSE42-NEXT: blendvpd %xmm1, %xmm3 600; SSE42-NEXT: movapd %xmm2, %xmm0 601; SSE42-NEXT: movapd %xmm3, %xmm1 602; SSE42-NEXT: retq 603; 604; AVX1-LABEL: max_ge_v4i64: 605; AVX1: # BB#0: 606; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 607; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 608; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 609; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 610; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 611; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 612; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 613; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 614; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm5 615; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3 616; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3 617; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 618; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 619; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 620; AVX1-NEXT: retq 621; 622; AVX2-LABEL: max_ge_v4i64: 623; AVX2: # BB#0: 624; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 625; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 626; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 627; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 628; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 629; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 630; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 631; AVX2-NEXT: retq 632; 633; AVX512-LABEL: max_ge_v4i64: 634; AVX512: # BB#0: 635; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 636; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3 637; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2 638; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 639; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 640; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 641; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 642; AVX512-NEXT: retq 643 %1 = icmp uge <4 x i64> %a, %b 644 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b 645 ret <4 x i64> %2 646} 647 648define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) { 649; SSE2-LABEL: max_ge_v4i32: 650; SSE2: # BB#0: 651; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 652; SSE2-NEXT: movdqa %xmm0, %xmm2 653; SSE2-NEXT: pxor %xmm3, %xmm2 654; SSE2-NEXT: pxor %xmm1, %xmm3 655; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 656; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 657; SSE2-NEXT: pxor %xmm3, %xmm2 658; SSE2-NEXT: pandn %xmm0, %xmm3 659; SSE2-NEXT: pandn %xmm1, %xmm2 660; SSE2-NEXT: por %xmm3, %xmm2 661; SSE2-NEXT: movdqa %xmm2, %xmm0 662; SSE2-NEXT: retq 663; 664; SSE41-LABEL: max_ge_v4i32: 665; SSE41: # BB#0: 666; SSE41-NEXT: pmaxud %xmm1, %xmm0 667; SSE41-NEXT: retq 668; 669; SSE42-LABEL: max_ge_v4i32: 670; SSE42: # BB#0: 671; SSE42-NEXT: pmaxud %xmm1, %xmm0 672; SSE42-NEXT: retq 673; 674; AVX-LABEL: max_ge_v4i32: 675; AVX: # BB#0: 676; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 677; AVX-NEXT: retq 678 %1 = icmp uge <4 x i32> %a, %b 679 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 680 ret <4 x i32> %2 681} 682 683define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) { 684; SSE2-LABEL: max_ge_v8i32: 685; SSE2: # BB#0: 686; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] 687; SSE2-NEXT: movdqa %xmm1, %xmm4 688; SSE2-NEXT: pxor %xmm6, %xmm4 689; SSE2-NEXT: movdqa %xmm3, %xmm7 690; SSE2-NEXT: pxor %xmm6, %xmm7 691; SSE2-NEXT: pcmpgtd %xmm4, %xmm7 692; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 693; SSE2-NEXT: movdqa %xmm7, %xmm5 694; SSE2-NEXT: pxor %xmm4, %xmm5 695; SSE2-NEXT: movdqa %xmm0, %xmm8 696; SSE2-NEXT: pxor %xmm6, %xmm8 697; SSE2-NEXT: pxor %xmm2, %xmm6 698; SSE2-NEXT: pcmpgtd %xmm8, %xmm6 699; SSE2-NEXT: pxor %xmm6, %xmm4 700; SSE2-NEXT: pandn %xmm0, %xmm6 701; SSE2-NEXT: pandn %xmm2, %xmm4 702; SSE2-NEXT: por %xmm6, %xmm4 703; SSE2-NEXT: pandn %xmm1, %xmm7 704; SSE2-NEXT: pandn %xmm3, %xmm5 705; SSE2-NEXT: por %xmm7, %xmm5 706; SSE2-NEXT: movdqa %xmm4, %xmm0 707; SSE2-NEXT: movdqa %xmm5, %xmm1 708; SSE2-NEXT: retq 709; 710; SSE41-LABEL: max_ge_v8i32: 711; SSE41: # BB#0: 712; SSE41-NEXT: pmaxud %xmm2, %xmm0 713; SSE41-NEXT: pmaxud %xmm3, %xmm1 714; SSE41-NEXT: retq 715; 716; SSE42-LABEL: max_ge_v8i32: 717; SSE42: # BB#0: 718; SSE42-NEXT: pmaxud %xmm2, %xmm0 719; SSE42-NEXT: pmaxud %xmm3, %xmm1 720; SSE42-NEXT: retq 721; 722; AVX1-LABEL: max_ge_v8i32: 723; AVX1: # BB#0: 724; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 725; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 726; AVX1-NEXT: vpmaxud %xmm2, %xmm3, %xmm2 727; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 728; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 729; AVX1-NEXT: retq 730; 731; AVX2-LABEL: max_ge_v8i32: 732; AVX2: # BB#0: 733; AVX2-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 734; AVX2-NEXT: retq 735; 736; AVX512-LABEL: max_ge_v8i32: 737; AVX512: # BB#0: 738; AVX512-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 739; AVX512-NEXT: retq 740 %1 = icmp uge <8 x i32> %a, %b 741 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b 742 ret <8 x i32> %2 743} 744 745define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) { 746; SSE2-LABEL: max_ge_v8i16: 747; SSE2: # BB#0: 748; SSE2-NEXT: movdqa %xmm1, %xmm2 749; SSE2-NEXT: psubusw %xmm0, %xmm2 750; SSE2-NEXT: pxor %xmm3, %xmm3 751; SSE2-NEXT: pcmpeqw %xmm2, %xmm3 752; SSE2-NEXT: pand %xmm3, %xmm0 753; SSE2-NEXT: pandn %xmm1, %xmm3 754; SSE2-NEXT: por %xmm3, %xmm0 755; SSE2-NEXT: retq 756; 757; SSE41-LABEL: max_ge_v8i16: 758; SSE41: # BB#0: 759; SSE41-NEXT: pmaxuw %xmm1, %xmm0 760; SSE41-NEXT: retq 761; 762; SSE42-LABEL: max_ge_v8i16: 763; SSE42: # BB#0: 764; SSE42-NEXT: pmaxuw %xmm1, %xmm0 765; SSE42-NEXT: retq 766; 767; AVX-LABEL: max_ge_v8i16: 768; AVX: # BB#0: 769; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 770; AVX-NEXT: retq 771 %1 = icmp uge <8 x i16> %a, %b 772 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 773 ret <8 x i16> %2 774} 775 776define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) { 777; SSE2-LABEL: max_ge_v16i16: 778; SSE2: # BB#0: 779; SSE2-NEXT: movdqa %xmm3, %xmm4 780; SSE2-NEXT: psubusw %xmm1, %xmm4 781; SSE2-NEXT: pxor %xmm5, %xmm5 782; SSE2-NEXT: pcmpeqw %xmm5, %xmm4 783; SSE2-NEXT: movdqa %xmm2, %xmm6 784; SSE2-NEXT: psubusw %xmm0, %xmm6 785; SSE2-NEXT: pcmpeqw %xmm5, %xmm6 786; SSE2-NEXT: pand %xmm6, %xmm0 787; SSE2-NEXT: pandn %xmm2, %xmm6 788; SSE2-NEXT: por %xmm6, %xmm0 789; SSE2-NEXT: pand %xmm4, %xmm1 790; SSE2-NEXT: pandn %xmm3, %xmm4 791; SSE2-NEXT: por %xmm4, %xmm1 792; SSE2-NEXT: retq 793; 794; SSE41-LABEL: max_ge_v16i16: 795; SSE41: # BB#0: 796; SSE41-NEXT: pmaxuw %xmm2, %xmm0 797; SSE41-NEXT: pmaxuw %xmm3, %xmm1 798; SSE41-NEXT: retq 799; 800; SSE42-LABEL: max_ge_v16i16: 801; SSE42: # BB#0: 802; SSE42-NEXT: pmaxuw %xmm2, %xmm0 803; SSE42-NEXT: pmaxuw %xmm3, %xmm1 804; SSE42-NEXT: retq 805; 806; AVX1-LABEL: max_ge_v16i16: 807; AVX1: # BB#0: 808; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 809; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 810; AVX1-NEXT: vpmaxuw %xmm2, %xmm3, %xmm2 811; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 812; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 813; AVX1-NEXT: retq 814; 815; AVX2-LABEL: max_ge_v16i16: 816; AVX2: # BB#0: 817; AVX2-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 818; AVX2-NEXT: retq 819; 820; AVX512-LABEL: max_ge_v16i16: 821; AVX512: # BB#0: 822; AVX512-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 823; AVX512-NEXT: retq 824 %1 = icmp uge <16 x i16> %a, %b 825 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b 826 ret <16 x i16> %2 827} 828 829define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) { 830; SSE-LABEL: max_ge_v16i8: 831; SSE: # BB#0: 832; SSE-NEXT: pmaxub %xmm1, %xmm0 833; SSE-NEXT: retq 834; 835; AVX-LABEL: max_ge_v16i8: 836; AVX: # BB#0: 837; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 838; AVX-NEXT: retq 839 %1 = icmp uge <16 x i8> %a, %b 840 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 841 ret <16 x i8> %2 842} 843 844define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) { 845; SSE-LABEL: max_ge_v32i8: 846; SSE: # BB#0: 847; SSE-NEXT: pmaxub %xmm2, %xmm0 848; SSE-NEXT: pmaxub %xmm3, %xmm1 849; SSE-NEXT: retq 850; 851; AVX1-LABEL: max_ge_v32i8: 852; AVX1: # BB#0: 853; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 854; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 855; AVX1-NEXT: vpmaxub %xmm2, %xmm3, %xmm2 856; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 857; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 858; AVX1-NEXT: retq 859; 860; AVX2-LABEL: max_ge_v32i8: 861; AVX2: # BB#0: 862; AVX2-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 863; AVX2-NEXT: retq 864; 865; AVX512-LABEL: max_ge_v32i8: 866; AVX512: # BB#0: 867; AVX512-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 868; AVX512-NEXT: retq 869 %1 = icmp uge <32 x i8> %a, %b 870 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b 871 ret <32 x i8> %2 872} 873 874; 875; Unsigned Minimum (LT) 876; 877 878define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) { 879; SSE2-LABEL: min_lt_v2i64: 880; SSE2: # BB#0: 881; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 882; SSE2-NEXT: movdqa %xmm0, %xmm3 883; SSE2-NEXT: pxor %xmm2, %xmm3 884; SSE2-NEXT: pxor %xmm1, %xmm2 885; SSE2-NEXT: movdqa %xmm2, %xmm4 886; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 887; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 888; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 889; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 890; SSE2-NEXT: pand %xmm5, %xmm2 891; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 892; SSE2-NEXT: por %xmm2, %xmm3 893; SSE2-NEXT: pand %xmm3, %xmm0 894; SSE2-NEXT: pandn %xmm1, %xmm3 895; SSE2-NEXT: por %xmm3, %xmm0 896; SSE2-NEXT: retq 897; 898; SSE41-LABEL: min_lt_v2i64: 899; SSE41: # BB#0: 900; SSE41-NEXT: movdqa %xmm0, %xmm2 901; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] 902; SSE41-NEXT: movdqa %xmm2, %xmm3 903; SSE41-NEXT: pxor %xmm0, %xmm3 904; SSE41-NEXT: pxor %xmm1, %xmm0 905; SSE41-NEXT: movdqa %xmm0, %xmm4 906; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 907; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 908; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 909; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 910; SSE41-NEXT: pand %xmm5, %xmm3 911; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3] 912; SSE41-NEXT: por %xmm3, %xmm0 913; SSE41-NEXT: blendvpd %xmm2, %xmm1 914; SSE41-NEXT: movapd %xmm1, %xmm0 915; SSE41-NEXT: retq 916; 917; SSE42-LABEL: min_lt_v2i64: 918; SSE42: # BB#0: 919; SSE42-NEXT: movdqa %xmm0, %xmm2 920; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 921; SSE42-NEXT: movdqa %xmm2, %xmm3 922; SSE42-NEXT: pxor %xmm0, %xmm3 923; SSE42-NEXT: pxor %xmm1, %xmm0 924; SSE42-NEXT: pcmpgtq %xmm3, %xmm0 925; SSE42-NEXT: blendvpd %xmm2, %xmm1 926; SSE42-NEXT: movapd %xmm1, %xmm0 927; SSE42-NEXT: retq 928; 929; AVX-LABEL: min_lt_v2i64: 930; AVX: # BB#0: 931; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 932; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm3 933; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm2 934; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 935; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 936; AVX-NEXT: retq 937 %1 = icmp ult <2 x i64> %a, %b 938 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b 939 ret <2 x i64> %2 940} 941 942define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) { 943; SSE2-LABEL: min_lt_v4i64: 944; SSE2: # BB#0: 945; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 946; SSE2-NEXT: movdqa %xmm1, %xmm5 947; SSE2-NEXT: pxor %xmm4, %xmm5 948; SSE2-NEXT: movdqa %xmm3, %xmm6 949; SSE2-NEXT: pxor %xmm4, %xmm6 950; SSE2-NEXT: movdqa %xmm6, %xmm7 951; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 952; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 953; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 954; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] 955; SSE2-NEXT: pand %xmm8, %xmm5 956; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] 957; SSE2-NEXT: por %xmm5, %xmm6 958; SSE2-NEXT: movdqa %xmm0, %xmm5 959; SSE2-NEXT: pxor %xmm4, %xmm5 960; SSE2-NEXT: pxor %xmm2, %xmm4 961; SSE2-NEXT: movdqa %xmm4, %xmm7 962; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 963; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 964; SSE2-NEXT: pcmpeqd %xmm5, %xmm4 965; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 966; SSE2-NEXT: pand %xmm8, %xmm4 967; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 968; SSE2-NEXT: por %xmm4, %xmm5 969; SSE2-NEXT: pand %xmm5, %xmm0 970; SSE2-NEXT: pandn %xmm2, %xmm5 971; SSE2-NEXT: por %xmm5, %xmm0 972; SSE2-NEXT: pand %xmm6, %xmm1 973; SSE2-NEXT: pandn %xmm3, %xmm6 974; SSE2-NEXT: por %xmm6, %xmm1 975; SSE2-NEXT: retq 976; 977; SSE41-LABEL: min_lt_v4i64: 978; SSE41: # BB#0: 979; SSE41-NEXT: movdqa %xmm0, %xmm8 980; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] 981; SSE41-NEXT: movdqa %xmm1, %xmm5 982; SSE41-NEXT: pxor %xmm0, %xmm5 983; SSE41-NEXT: movdqa %xmm3, %xmm6 984; SSE41-NEXT: pxor %xmm0, %xmm6 985; SSE41-NEXT: movdqa %xmm6, %xmm7 986; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 987; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] 988; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 989; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 990; SSE41-NEXT: pand %xmm4, %xmm6 991; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 992; SSE41-NEXT: por %xmm6, %xmm5 993; SSE41-NEXT: movdqa %xmm8, %xmm4 994; SSE41-NEXT: pxor %xmm0, %xmm4 995; SSE41-NEXT: pxor %xmm2, %xmm0 996; SSE41-NEXT: movdqa %xmm0, %xmm6 997; SSE41-NEXT: pcmpgtd %xmm4, %xmm6 998; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] 999; SSE41-NEXT: pcmpeqd %xmm4, %xmm0 1000; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 1001; SSE41-NEXT: pand %xmm7, %xmm4 1002; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3] 1003; SSE41-NEXT: por %xmm4, %xmm0 1004; SSE41-NEXT: blendvpd %xmm8, %xmm2 1005; SSE41-NEXT: movdqa %xmm5, %xmm0 1006; SSE41-NEXT: blendvpd %xmm1, %xmm3 1007; SSE41-NEXT: movapd %xmm2, %xmm0 1008; SSE41-NEXT: movapd %xmm3, %xmm1 1009; SSE41-NEXT: retq 1010; 1011; SSE42-LABEL: min_lt_v4i64: 1012; SSE42: # BB#0: 1013; SSE42-NEXT: movdqa %xmm0, %xmm4 1014; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 1015; SSE42-NEXT: movdqa %xmm1, %xmm6 1016; SSE42-NEXT: pxor %xmm0, %xmm6 1017; SSE42-NEXT: movdqa %xmm3, %xmm5 1018; SSE42-NEXT: pxor %xmm0, %xmm5 1019; SSE42-NEXT: pcmpgtq %xmm6, %xmm5 1020; SSE42-NEXT: movdqa %xmm4, %xmm6 1021; SSE42-NEXT: pxor %xmm0, %xmm6 1022; SSE42-NEXT: pxor %xmm2, %xmm0 1023; SSE42-NEXT: pcmpgtq %xmm6, %xmm0 1024; SSE42-NEXT: blendvpd %xmm4, %xmm2 1025; SSE42-NEXT: movdqa %xmm5, %xmm0 1026; SSE42-NEXT: blendvpd %xmm1, %xmm3 1027; SSE42-NEXT: movapd %xmm2, %xmm0 1028; SSE42-NEXT: movapd %xmm3, %xmm1 1029; SSE42-NEXT: retq 1030; 1031; AVX1-LABEL: min_lt_v4i64: 1032; AVX1: # BB#0: 1033; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1034; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 1035; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 1036; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 1037; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 1038; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 1039; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm4 1040; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm3 1041; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3 1042; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1043; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1044; AVX1-NEXT: retq 1045; 1046; AVX2-LABEL: min_lt_v4i64: 1047; AVX2: # BB#0: 1048; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 1049; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3 1050; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm2 1051; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 1052; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1053; AVX2-NEXT: retq 1054; 1055; AVX512-LABEL: min_lt_v4i64: 1056; AVX512: # BB#0: 1057; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 1058; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm3 1059; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm2 1060; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 1061; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1062; AVX512-NEXT: retq 1063 %1 = icmp ult <4 x i64> %a, %b 1064 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b 1065 ret <4 x i64> %2 1066} 1067 1068define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) { 1069; SSE2-LABEL: min_lt_v4i32: 1070; SSE2: # BB#0: 1071; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 1072; SSE2-NEXT: movdqa %xmm0, %xmm3 1073; SSE2-NEXT: pxor %xmm2, %xmm3 1074; SSE2-NEXT: pxor %xmm1, %xmm2 1075; SSE2-NEXT: pcmpgtd %xmm3, %xmm2 1076; SSE2-NEXT: pand %xmm2, %xmm0 1077; SSE2-NEXT: pandn %xmm1, %xmm2 1078; SSE2-NEXT: por %xmm2, %xmm0 1079; SSE2-NEXT: retq 1080; 1081; SSE41-LABEL: min_lt_v4i32: 1082; SSE41: # BB#0: 1083; SSE41-NEXT: pminud %xmm1, %xmm0 1084; SSE41-NEXT: retq 1085; 1086; SSE42-LABEL: min_lt_v4i32: 1087; SSE42: # BB#0: 1088; SSE42-NEXT: pminud %xmm1, %xmm0 1089; SSE42-NEXT: retq 1090; 1091; AVX-LABEL: min_lt_v4i32: 1092; AVX: # BB#0: 1093; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 1094; AVX-NEXT: retq 1095 %1 = icmp ult <4 x i32> %a, %b 1096 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 1097 ret <4 x i32> %2 1098} 1099 1100define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) { 1101; SSE2-LABEL: min_lt_v8i32: 1102; SSE2: # BB#0: 1103; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 1104; SSE2-NEXT: movdqa %xmm1, %xmm5 1105; SSE2-NEXT: pxor %xmm4, %xmm5 1106; SSE2-NEXT: movdqa %xmm3, %xmm6 1107; SSE2-NEXT: pxor %xmm4, %xmm6 1108; SSE2-NEXT: pcmpgtd %xmm5, %xmm6 1109; SSE2-NEXT: movdqa %xmm0, %xmm5 1110; SSE2-NEXT: pxor %xmm4, %xmm5 1111; SSE2-NEXT: pxor %xmm2, %xmm4 1112; SSE2-NEXT: pcmpgtd %xmm5, %xmm4 1113; SSE2-NEXT: pand %xmm4, %xmm0 1114; SSE2-NEXT: pandn %xmm2, %xmm4 1115; SSE2-NEXT: por %xmm4, %xmm0 1116; SSE2-NEXT: pand %xmm6, %xmm1 1117; SSE2-NEXT: pandn %xmm3, %xmm6 1118; SSE2-NEXT: por %xmm6, %xmm1 1119; SSE2-NEXT: retq 1120; 1121; SSE41-LABEL: min_lt_v8i32: 1122; SSE41: # BB#0: 1123; SSE41-NEXT: pminud %xmm2, %xmm0 1124; SSE41-NEXT: pminud %xmm3, %xmm1 1125; SSE41-NEXT: retq 1126; 1127; SSE42-LABEL: min_lt_v8i32: 1128; SSE42: # BB#0: 1129; SSE42-NEXT: pminud %xmm2, %xmm0 1130; SSE42-NEXT: pminud %xmm3, %xmm1 1131; SSE42-NEXT: retq 1132; 1133; AVX1-LABEL: min_lt_v8i32: 1134; AVX1: # BB#0: 1135; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1136; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1137; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1138; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1139; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1140; AVX1-NEXT: retq 1141; 1142; AVX2-LABEL: min_lt_v8i32: 1143; AVX2: # BB#0: 1144; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1145; AVX2-NEXT: retq 1146; 1147; AVX512-LABEL: min_lt_v8i32: 1148; AVX512: # BB#0: 1149; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0 1150; AVX512-NEXT: retq 1151 %1 = icmp ult <8 x i32> %a, %b 1152 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b 1153 ret <8 x i32> %2 1154} 1155 1156define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) { 1157; SSE2-LABEL: min_lt_v8i16: 1158; SSE2: # BB#0: 1159; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 1160; SSE2-NEXT: movdqa %xmm0, %xmm3 1161; SSE2-NEXT: pxor %xmm2, %xmm3 1162; SSE2-NEXT: pxor %xmm1, %xmm2 1163; SSE2-NEXT: pcmpgtw %xmm3, %xmm2 1164; SSE2-NEXT: pand %xmm2, %xmm0 1165; SSE2-NEXT: pandn %xmm1, %xmm2 1166; SSE2-NEXT: por %xmm2, %xmm0 1167; SSE2-NEXT: retq 1168; 1169; SSE41-LABEL: min_lt_v8i16: 1170; SSE41: # BB#0: 1171; SSE41-NEXT: pminuw %xmm1, %xmm0 1172; SSE41-NEXT: retq 1173; 1174; SSE42-LABEL: min_lt_v8i16: 1175; SSE42: # BB#0: 1176; SSE42-NEXT: pminuw %xmm1, %xmm0 1177; SSE42-NEXT: retq 1178; 1179; AVX-LABEL: min_lt_v8i16: 1180; AVX: # BB#0: 1181; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1182; AVX-NEXT: retq 1183 %1 = icmp ult <8 x i16> %a, %b 1184 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 1185 ret <8 x i16> %2 1186} 1187 1188define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) { 1189; SSE2-LABEL: min_lt_v16i16: 1190; SSE2: # BB#0: 1191; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768] 1192; SSE2-NEXT: movdqa %xmm1, %xmm5 1193; SSE2-NEXT: pxor %xmm4, %xmm5 1194; SSE2-NEXT: movdqa %xmm3, %xmm6 1195; SSE2-NEXT: pxor %xmm4, %xmm6 1196; SSE2-NEXT: pcmpgtw %xmm5, %xmm6 1197; SSE2-NEXT: movdqa %xmm0, %xmm5 1198; SSE2-NEXT: pxor %xmm4, %xmm5 1199; SSE2-NEXT: pxor %xmm2, %xmm4 1200; SSE2-NEXT: pcmpgtw %xmm5, %xmm4 1201; SSE2-NEXT: pand %xmm4, %xmm0 1202; SSE2-NEXT: pandn %xmm2, %xmm4 1203; SSE2-NEXT: por %xmm4, %xmm0 1204; SSE2-NEXT: pand %xmm6, %xmm1 1205; SSE2-NEXT: pandn %xmm3, %xmm6 1206; SSE2-NEXT: por %xmm6, %xmm1 1207; SSE2-NEXT: retq 1208; 1209; SSE41-LABEL: min_lt_v16i16: 1210; SSE41: # BB#0: 1211; SSE41-NEXT: pminuw %xmm2, %xmm0 1212; SSE41-NEXT: pminuw %xmm3, %xmm1 1213; SSE41-NEXT: retq 1214; 1215; SSE42-LABEL: min_lt_v16i16: 1216; SSE42: # BB#0: 1217; SSE42-NEXT: pminuw %xmm2, %xmm0 1218; SSE42-NEXT: pminuw %xmm3, %xmm1 1219; SSE42-NEXT: retq 1220; 1221; AVX1-LABEL: min_lt_v16i16: 1222; AVX1: # BB#0: 1223; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1224; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1225; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2 1226; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1227; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1228; AVX1-NEXT: retq 1229; 1230; AVX2-LABEL: min_lt_v16i16: 1231; AVX2: # BB#0: 1232; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1233; AVX2-NEXT: retq 1234; 1235; AVX512-LABEL: min_lt_v16i16: 1236; AVX512: # BB#0: 1237; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1238; AVX512-NEXT: retq 1239 %1 = icmp ult <16 x i16> %a, %b 1240 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b 1241 ret <16 x i16> %2 1242} 1243 1244define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) { 1245; SSE-LABEL: min_lt_v16i8: 1246; SSE: # BB#0: 1247; SSE-NEXT: pminub %xmm1, %xmm0 1248; SSE-NEXT: retq 1249; 1250; AVX-LABEL: min_lt_v16i8: 1251; AVX: # BB#0: 1252; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 1253; AVX-NEXT: retq 1254 %1 = icmp ult <16 x i8> %a, %b 1255 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 1256 ret <16 x i8> %2 1257} 1258 1259define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) { 1260; SSE-LABEL: min_lt_v32i8: 1261; SSE: # BB#0: 1262; SSE-NEXT: pminub %xmm2, %xmm0 1263; SSE-NEXT: pminub %xmm3, %xmm1 1264; SSE-NEXT: retq 1265; 1266; AVX1-LABEL: min_lt_v32i8: 1267; AVX1: # BB#0: 1268; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1269; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1270; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2 1271; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1272; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1273; AVX1-NEXT: retq 1274; 1275; AVX2-LABEL: min_lt_v32i8: 1276; AVX2: # BB#0: 1277; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0 1278; AVX2-NEXT: retq 1279; 1280; AVX512-LABEL: min_lt_v32i8: 1281; AVX512: # BB#0: 1282; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0 1283; AVX512-NEXT: retq 1284 %1 = icmp ult <32 x i8> %a, %b 1285 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b 1286 ret <32 x i8> %2 1287} 1288 1289; 1290; Unsigned Minimum (LE) 1291; 1292 1293define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) { 1294; SSE2-LABEL: min_le_v2i64: 1295; SSE2: # BB#0: 1296; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] 1297; SSE2-NEXT: movdqa %xmm1, %xmm3 1298; SSE2-NEXT: pxor %xmm2, %xmm3 1299; SSE2-NEXT: pxor %xmm0, %xmm2 1300; SSE2-NEXT: movdqa %xmm2, %xmm4 1301; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1302; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1303; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 1304; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1305; SSE2-NEXT: pand %xmm5, %xmm2 1306; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1307; SSE2-NEXT: por %xmm2, %xmm3 1308; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1309; SSE2-NEXT: pxor %xmm3, %xmm2 1310; SSE2-NEXT: pandn %xmm0, %xmm3 1311; SSE2-NEXT: pandn %xmm1, %xmm2 1312; SSE2-NEXT: por %xmm3, %xmm2 1313; SSE2-NEXT: movdqa %xmm2, %xmm0 1314; SSE2-NEXT: retq 1315; 1316; SSE41-LABEL: min_le_v2i64: 1317; SSE41: # BB#0: 1318; SSE41-NEXT: movdqa %xmm0, %xmm2 1319; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] 1320; SSE41-NEXT: movdqa %xmm1, %xmm3 1321; SSE41-NEXT: pxor %xmm0, %xmm3 1322; SSE41-NEXT: pxor %xmm2, %xmm0 1323; SSE41-NEXT: movdqa %xmm0, %xmm4 1324; SSE41-NEXT: pcmpgtd %xmm3, %xmm4 1325; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1326; SSE41-NEXT: pcmpeqd %xmm3, %xmm0 1327; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1328; SSE41-NEXT: pand %xmm5, %xmm0 1329; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1330; SSE41-NEXT: por %xmm0, %xmm3 1331; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 1332; SSE41-NEXT: pxor %xmm3, %xmm0 1333; SSE41-NEXT: blendvpd %xmm2, %xmm1 1334; SSE41-NEXT: movapd %xmm1, %xmm0 1335; SSE41-NEXT: retq 1336; 1337; SSE42-LABEL: min_le_v2i64: 1338; SSE42: # BB#0: 1339; SSE42-NEXT: movdqa %xmm0, %xmm2 1340; SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 1341; SSE42-NEXT: movdqa %xmm1, %xmm0 1342; SSE42-NEXT: pxor %xmm3, %xmm0 1343; SSE42-NEXT: pxor %xmm2, %xmm3 1344; SSE42-NEXT: pcmpgtq %xmm0, %xmm3 1345; SSE42-NEXT: pcmpeqd %xmm0, %xmm0 1346; SSE42-NEXT: pxor %xmm3, %xmm0 1347; SSE42-NEXT: blendvpd %xmm2, %xmm1 1348; SSE42-NEXT: movapd %xmm1, %xmm0 1349; SSE42-NEXT: retq 1350; 1351; AVX-LABEL: min_le_v2i64: 1352; AVX: # BB#0: 1353; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] 1354; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm3 1355; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2 1356; AVX-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 1357; AVX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 1358; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm2 1359; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1360; AVX-NEXT: retq 1361 %1 = icmp ule <2 x i64> %a, %b 1362 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b 1363 ret <2 x i64> %2 1364} 1365 1366define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) { 1367; SSE2-LABEL: min_le_v4i64: 1368; SSE2: # BB#0: 1369; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648] 1370; SSE2-NEXT: movdqa %xmm3, %xmm4 1371; SSE2-NEXT: pxor %xmm7, %xmm4 1372; SSE2-NEXT: movdqa %xmm1, %xmm5 1373; SSE2-NEXT: pxor %xmm7, %xmm5 1374; SSE2-NEXT: movdqa %xmm5, %xmm6 1375; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 1376; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2] 1377; SSE2-NEXT: pcmpeqd %xmm4, %xmm5 1378; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 1379; SSE2-NEXT: pand %xmm8, %xmm4 1380; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3] 1381; SSE2-NEXT: por %xmm4, %xmm8 1382; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 1383; SSE2-NEXT: movdqa %xmm8, %xmm9 1384; SSE2-NEXT: pxor %xmm4, %xmm9 1385; SSE2-NEXT: movdqa %xmm2, %xmm6 1386; SSE2-NEXT: pxor %xmm7, %xmm6 1387; SSE2-NEXT: pxor %xmm0, %xmm7 1388; SSE2-NEXT: movdqa %xmm7, %xmm5 1389; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 1390; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2] 1391; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 1392; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] 1393; SSE2-NEXT: pand %xmm10, %xmm6 1394; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 1395; SSE2-NEXT: por %xmm6, %xmm5 1396; SSE2-NEXT: pxor %xmm5, %xmm4 1397; SSE2-NEXT: pandn %xmm0, %xmm5 1398; SSE2-NEXT: pandn %xmm2, %xmm4 1399; SSE2-NEXT: por %xmm5, %xmm4 1400; SSE2-NEXT: pandn %xmm1, %xmm8 1401; SSE2-NEXT: pandn %xmm3, %xmm9 1402; SSE2-NEXT: por %xmm8, %xmm9 1403; SSE2-NEXT: movdqa %xmm4, %xmm0 1404; SSE2-NEXT: movdqa %xmm9, %xmm1 1405; SSE2-NEXT: retq 1406; 1407; SSE41-LABEL: min_le_v4i64: 1408; SSE41: # BB#0: 1409; SSE41-NEXT: movdqa %xmm0, %xmm8 1410; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648] 1411; SSE41-NEXT: movdqa %xmm3, %xmm5 1412; SSE41-NEXT: pxor %xmm0, %xmm5 1413; SSE41-NEXT: movdqa %xmm1, %xmm6 1414; SSE41-NEXT: pxor %xmm0, %xmm6 1415; SSE41-NEXT: movdqa %xmm6, %xmm7 1416; SSE41-NEXT: pcmpgtd %xmm5, %xmm7 1417; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] 1418; SSE41-NEXT: pcmpeqd %xmm5, %xmm6 1419; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] 1420; SSE41-NEXT: pand %xmm4, %xmm6 1421; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] 1422; SSE41-NEXT: por %xmm6, %xmm5 1423; SSE41-NEXT: pcmpeqd %xmm9, %xmm9 1424; SSE41-NEXT: pxor %xmm9, %xmm5 1425; SSE41-NEXT: movdqa %xmm2, %xmm6 1426; SSE41-NEXT: pxor %xmm0, %xmm6 1427; SSE41-NEXT: pxor %xmm8, %xmm0 1428; SSE41-NEXT: movdqa %xmm0, %xmm7 1429; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 1430; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] 1431; SSE41-NEXT: pcmpeqd %xmm6, %xmm0 1432; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] 1433; SSE41-NEXT: pand %xmm4, %xmm6 1434; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3] 1435; SSE41-NEXT: por %xmm6, %xmm0 1436; SSE41-NEXT: pxor %xmm9, %xmm0 1437; SSE41-NEXT: blendvpd %xmm8, %xmm2 1438; SSE41-NEXT: movdqa %xmm5, %xmm0 1439; SSE41-NEXT: blendvpd %xmm1, %xmm3 1440; SSE41-NEXT: movapd %xmm2, %xmm0 1441; SSE41-NEXT: movapd %xmm3, %xmm1 1442; SSE41-NEXT: retq 1443; 1444; SSE42-LABEL: min_le_v4i64: 1445; SSE42: # BB#0: 1446; SSE42-NEXT: movdqa %xmm0, %xmm4 1447; SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] 1448; SSE42-NEXT: movdqa %xmm3, %xmm6 1449; SSE42-NEXT: pxor %xmm0, %xmm6 1450; SSE42-NEXT: movdqa %xmm1, %xmm5 1451; SSE42-NEXT: pxor %xmm0, %xmm5 1452; SSE42-NEXT: pcmpgtq %xmm6, %xmm5 1453; SSE42-NEXT: pcmpeqd %xmm6, %xmm6 1454; SSE42-NEXT: pxor %xmm6, %xmm5 1455; SSE42-NEXT: movdqa %xmm2, %xmm7 1456; SSE42-NEXT: pxor %xmm0, %xmm7 1457; SSE42-NEXT: pxor %xmm4, %xmm0 1458; SSE42-NEXT: pcmpgtq %xmm7, %xmm0 1459; SSE42-NEXT: pxor %xmm6, %xmm0 1460; SSE42-NEXT: blendvpd %xmm4, %xmm2 1461; SSE42-NEXT: movdqa %xmm5, %xmm0 1462; SSE42-NEXT: blendvpd %xmm1, %xmm3 1463; SSE42-NEXT: movapd %xmm2, %xmm0 1464; SSE42-NEXT: movapd %xmm3, %xmm1 1465; SSE42-NEXT: retq 1466; 1467; AVX1-LABEL: min_le_v4i64: 1468; AVX1: # BB#0: 1469; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1470; AVX1-NEXT: vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] 1471; AVX1-NEXT: vxorps %xmm3, %xmm2, %xmm2 1472; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 1473; AVX1-NEXT: vxorps %xmm3, %xmm4, %xmm4 1474; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 1475; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 1476; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2 1477; AVX1-NEXT: vxorps %xmm3, %xmm1, %xmm5 1478; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 1479; AVX1-NEXT: vpcmpgtq %xmm5, %xmm3, %xmm3 1480; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3 1481; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1482; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1483; AVX1-NEXT: retq 1484; 1485; AVX2-LABEL: min_le_v4i64: 1486; AVX2: # BB#0: 1487; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 1488; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3 1489; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2 1490; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 1491; AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 1492; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm2 1493; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1494; AVX2-NEXT: retq 1495; 1496; AVX512-LABEL: min_le_v4i64: 1497; AVX512: # BB#0: 1498; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 1499; AVX512-NEXT: vpxor %ymm2, %ymm1, %ymm3 1500; AVX512-NEXT: vpxor %ymm2, %ymm0, %ymm2 1501; AVX512-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 1502; AVX512-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 1503; AVX512-NEXT: vpxor %ymm3, %ymm2, %ymm2 1504; AVX512-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1505; AVX512-NEXT: retq 1506 %1 = icmp ule <4 x i64> %a, %b 1507 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b 1508 ret <4 x i64> %2 1509} 1510 1511define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) { 1512; SSE2-LABEL: min_le_v4i32: 1513; SSE2: # BB#0: 1514; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648] 1515; SSE2-NEXT: movdqa %xmm1, %xmm2 1516; SSE2-NEXT: pxor %xmm3, %xmm2 1517; SSE2-NEXT: pxor %xmm0, %xmm3 1518; SSE2-NEXT: pcmpgtd %xmm2, %xmm3 1519; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 1520; SSE2-NEXT: pxor %xmm3, %xmm2 1521; SSE2-NEXT: pandn %xmm0, %xmm3 1522; SSE2-NEXT: pandn %xmm1, %xmm2 1523; SSE2-NEXT: por %xmm3, %xmm2 1524; SSE2-NEXT: movdqa %xmm2, %xmm0 1525; SSE2-NEXT: retq 1526; 1527; SSE41-LABEL: min_le_v4i32: 1528; SSE41: # BB#0: 1529; SSE41-NEXT: pminud %xmm1, %xmm0 1530; SSE41-NEXT: retq 1531; 1532; SSE42-LABEL: min_le_v4i32: 1533; SSE42: # BB#0: 1534; SSE42-NEXT: pminud %xmm1, %xmm0 1535; SSE42-NEXT: retq 1536; 1537; AVX-LABEL: min_le_v4i32: 1538; AVX: # BB#0: 1539; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 1540; AVX-NEXT: retq 1541 %1 = icmp ule <4 x i32> %a, %b 1542 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 1543 ret <4 x i32> %2 1544} 1545 1546define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) { 1547; SSE2-LABEL: min_le_v8i32: 1548; SSE2: # BB#0: 1549; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648] 1550; SSE2-NEXT: movdqa %xmm3, %xmm4 1551; SSE2-NEXT: pxor %xmm6, %xmm4 1552; SSE2-NEXT: movdqa %xmm1, %xmm7 1553; SSE2-NEXT: pxor %xmm6, %xmm7 1554; SSE2-NEXT: pcmpgtd %xmm4, %xmm7 1555; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 1556; SSE2-NEXT: movdqa %xmm7, %xmm5 1557; SSE2-NEXT: pxor %xmm4, %xmm5 1558; SSE2-NEXT: movdqa %xmm2, %xmm8 1559; SSE2-NEXT: pxor %xmm6, %xmm8 1560; SSE2-NEXT: pxor %xmm0, %xmm6 1561; SSE2-NEXT: pcmpgtd %xmm8, %xmm6 1562; SSE2-NEXT: pxor %xmm6, %xmm4 1563; SSE2-NEXT: pandn %xmm0, %xmm6 1564; SSE2-NEXT: pandn %xmm2, %xmm4 1565; SSE2-NEXT: por %xmm6, %xmm4 1566; SSE2-NEXT: pandn %xmm1, %xmm7 1567; SSE2-NEXT: pandn %xmm3, %xmm5 1568; SSE2-NEXT: por %xmm7, %xmm5 1569; SSE2-NEXT: movdqa %xmm4, %xmm0 1570; SSE2-NEXT: movdqa %xmm5, %xmm1 1571; SSE2-NEXT: retq 1572; 1573; SSE41-LABEL: min_le_v8i32: 1574; SSE41: # BB#0: 1575; SSE41-NEXT: pminud %xmm2, %xmm0 1576; SSE41-NEXT: pminud %xmm3, %xmm1 1577; SSE41-NEXT: retq 1578; 1579; SSE42-LABEL: min_le_v8i32: 1580; SSE42: # BB#0: 1581; SSE42-NEXT: pminud %xmm2, %xmm0 1582; SSE42-NEXT: pminud %xmm3, %xmm1 1583; SSE42-NEXT: retq 1584; 1585; AVX1-LABEL: min_le_v8i32: 1586; AVX1: # BB#0: 1587; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1588; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1589; AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2 1590; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 1591; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1592; AVX1-NEXT: retq 1593; 1594; AVX2-LABEL: min_le_v8i32: 1595; AVX2: # BB#0: 1596; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0 1597; AVX2-NEXT: retq 1598; 1599; AVX512-LABEL: min_le_v8i32: 1600; AVX512: # BB#0: 1601; AVX512-NEXT: vpminud %ymm1, %ymm0, %ymm0 1602; AVX512-NEXT: retq 1603 %1 = icmp ule <8 x i32> %a, %b 1604 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b 1605 ret <8 x i32> %2 1606} 1607 1608define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) { 1609; SSE2-LABEL: min_le_v8i16: 1610; SSE2: # BB#0: 1611; SSE2-NEXT: movdqa %xmm0, %xmm2 1612; SSE2-NEXT: psubusw %xmm1, %xmm2 1613; SSE2-NEXT: pxor %xmm3, %xmm3 1614; SSE2-NEXT: pcmpeqw %xmm2, %xmm3 1615; SSE2-NEXT: pand %xmm3, %xmm0 1616; SSE2-NEXT: pandn %xmm1, %xmm3 1617; SSE2-NEXT: por %xmm3, %xmm0 1618; SSE2-NEXT: retq 1619; 1620; SSE41-LABEL: min_le_v8i16: 1621; SSE41: # BB#0: 1622; SSE41-NEXT: pminuw %xmm1, %xmm0 1623; SSE41-NEXT: retq 1624; 1625; SSE42-LABEL: min_le_v8i16: 1626; SSE42: # BB#0: 1627; SSE42-NEXT: pminuw %xmm1, %xmm0 1628; SSE42-NEXT: retq 1629; 1630; AVX-LABEL: min_le_v8i16: 1631; AVX: # BB#0: 1632; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1633; AVX-NEXT: retq 1634 %1 = icmp ule <8 x i16> %a, %b 1635 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 1636 ret <8 x i16> %2 1637} 1638 1639define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) { 1640; SSE2-LABEL: min_le_v16i16: 1641; SSE2: # BB#0: 1642; SSE2-NEXT: movdqa %xmm1, %xmm4 1643; SSE2-NEXT: psubusw %xmm3, %xmm4 1644; SSE2-NEXT: pxor %xmm6, %xmm6 1645; SSE2-NEXT: pcmpeqw %xmm6, %xmm4 1646; SSE2-NEXT: movdqa %xmm0, %xmm5 1647; SSE2-NEXT: psubusw %xmm2, %xmm5 1648; SSE2-NEXT: pcmpeqw %xmm6, %xmm5 1649; SSE2-NEXT: pand %xmm5, %xmm0 1650; SSE2-NEXT: pandn %xmm2, %xmm5 1651; SSE2-NEXT: por %xmm0, %xmm5 1652; SSE2-NEXT: pand %xmm4, %xmm1 1653; SSE2-NEXT: pandn %xmm3, %xmm4 1654; SSE2-NEXT: por %xmm1, %xmm4 1655; SSE2-NEXT: movdqa %xmm5, %xmm0 1656; SSE2-NEXT: movdqa %xmm4, %xmm1 1657; SSE2-NEXT: retq 1658; 1659; SSE41-LABEL: min_le_v16i16: 1660; SSE41: # BB#0: 1661; SSE41-NEXT: pminuw %xmm2, %xmm0 1662; SSE41-NEXT: pminuw %xmm3, %xmm1 1663; SSE41-NEXT: retq 1664; 1665; SSE42-LABEL: min_le_v16i16: 1666; SSE42: # BB#0: 1667; SSE42-NEXT: pminuw %xmm2, %xmm0 1668; SSE42-NEXT: pminuw %xmm3, %xmm1 1669; SSE42-NEXT: retq 1670; 1671; AVX1-LABEL: min_le_v16i16: 1672; AVX1: # BB#0: 1673; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1674; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1675; AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2 1676; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 1677; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1678; AVX1-NEXT: retq 1679; 1680; AVX2-LABEL: min_le_v16i16: 1681; AVX2: # BB#0: 1682; AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1683; AVX2-NEXT: retq 1684; 1685; AVX512-LABEL: min_le_v16i16: 1686; AVX512: # BB#0: 1687; AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0 1688; AVX512-NEXT: retq 1689 %1 = icmp ule <16 x i16> %a, %b 1690 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b 1691 ret <16 x i16> %2 1692} 1693 1694define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) { 1695; SSE-LABEL: min_le_v16i8: 1696; SSE: # BB#0: 1697; SSE-NEXT: pminub %xmm1, %xmm0 1698; SSE-NEXT: retq 1699; 1700; AVX-LABEL: min_le_v16i8: 1701; AVX: # BB#0: 1702; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 1703; AVX-NEXT: retq 1704 %1 = icmp ule <16 x i8> %a, %b 1705 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 1706 ret <16 x i8> %2 1707} 1708 1709define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) { 1710; SSE-LABEL: min_le_v32i8: 1711; SSE: # BB#0: 1712; SSE-NEXT: pminub %xmm2, %xmm0 1713; SSE-NEXT: pminub %xmm3, %xmm1 1714; SSE-NEXT: retq 1715; 1716; AVX1-LABEL: min_le_v32i8: 1717; AVX1: # BB#0: 1718; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1719; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1720; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2 1721; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 1722; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1723; AVX1-NEXT: retq 1724; 1725; AVX2-LABEL: min_le_v32i8: 1726; AVX2: # BB#0: 1727; AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0 1728; AVX2-NEXT: retq 1729; 1730; AVX512-LABEL: min_le_v32i8: 1731; AVX512: # BB#0: 1732; AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0 1733; AVX512-NEXT: retq 1734 %1 = icmp ule <32 x i8> %a, %b 1735 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b 1736 ret <32 x i8> %2 1737} 1738 1739; 1740; Constant Folding 1741; 1742 1743define <2 x i64> @max_gt_v2i64c() { 1744; SSE-LABEL: max_gt_v2i64c: 1745; SSE: # BB#0: 1746; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7] 1747; SSE-NEXT: retq 1748; 1749; AVX-LABEL: max_gt_v2i64c: 1750; AVX: # BB#0: 1751; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7] 1752; AVX-NEXT: retq 1753 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 1754 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 1755 %3 = icmp ugt <2 x i64> %1, %2 1756 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 1757 ret <2 x i64> %4 1758} 1759 1760define <4 x i64> @max_gt_v4i64c() { 1761; SSE-LABEL: max_gt_v4i64c: 1762; SSE: # BB#0: 1763; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7] 1764; SSE-NEXT: pcmpeqd %xmm0, %xmm0 1765; SSE-NEXT: retq 1766; 1767; AVX-LABEL: max_gt_v4i64c: 1768; AVX: # BB#0: 1769; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7] 1770; AVX-NEXT: retq 1771 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 1772 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 1773 %3 = icmp ugt <4 x i64> %1, %2 1774 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 1775 ret <4 x i64> %4 1776} 1777 1778define <4 x i32> @max_gt_v4i32c() { 1779; SSE-LABEL: max_gt_v4i32c: 1780; SSE: # BB#0: 1781; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] 1782; SSE-NEXT: retq 1783; 1784; AVX-LABEL: max_gt_v4i32c: 1785; AVX: # BB#0: 1786; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] 1787; AVX-NEXT: retq 1788 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 1789 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 1790 %3 = icmp ugt <4 x i32> %1, %2 1791 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 1792 ret <4 x i32> %4 1793} 1794 1795define <8 x i32> @max_gt_v8i32c() { 1796; SSE-LABEL: max_gt_v8i32c: 1797; SSE: # BB#0: 1798; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] 1799; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] 1800; SSE-NEXT: retq 1801; 1802; AVX-LABEL: max_gt_v8i32c: 1803; AVX: # BB#0: 1804; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] 1805; AVX-NEXT: retq 1806 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 1807 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 1808 %3 = icmp ugt <8 x i32> %1, %2 1809 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 1810 ret <8 x i32> %4 1811} 1812 1813define <8 x i16> @max_gt_v8i16c() { 1814; SSE-LABEL: max_gt_v8i16c: 1815; SSE: # BB#0: 1816; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] 1817; SSE-NEXT: retq 1818; 1819; AVX-LABEL: max_gt_v8i16c: 1820; AVX: # BB#0: 1821; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] 1822; AVX-NEXT: retq 1823 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 1824 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 1825 %3 = icmp ugt <8 x i16> %1, %2 1826 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 1827 ret <8 x i16> %4 1828} 1829 1830define <16 x i16> @max_gt_v16i16c() { 1831; SSE-LABEL: max_gt_v16i16c: 1832; SSE: # BB#0: 1833; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] 1834; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] 1835; SSE-NEXT: retq 1836; 1837; AVX-LABEL: max_gt_v16i16c: 1838; AVX: # BB#0: 1839; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] 1840; AVX-NEXT: retq 1841 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 1842 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 1843 %3 = icmp ugt <16 x i16> %1, %2 1844 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 1845 ret <16 x i16> %4 1846} 1847 1848define <16 x i8> @max_gt_v16i8c() { 1849; SSE-LABEL: max_gt_v16i8c: 1850; SSE: # BB#0: 1851; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] 1852; SSE-NEXT: retq 1853; 1854; AVX-LABEL: max_gt_v16i8c: 1855; AVX: # BB#0: 1856; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] 1857; AVX-NEXT: retq 1858 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 1859 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 1860 %3 = icmp ugt <16 x i8> %1, %2 1861 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 1862 ret <16 x i8> %4 1863} 1864 1865define <2 x i64> @max_ge_v2i64c() { 1866; SSE-LABEL: max_ge_v2i64c: 1867; SSE: # BB#0: 1868; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7] 1869; SSE-NEXT: retq 1870; 1871; AVX-LABEL: max_ge_v2i64c: 1872; AVX: # BB#0: 1873; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7] 1874; AVX-NEXT: retq 1875 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 1876 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 1877 %3 = icmp uge <2 x i64> %1, %2 1878 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 1879 ret <2 x i64> %4 1880} 1881 1882define <4 x i64> @max_ge_v4i64c() { 1883; SSE-LABEL: max_ge_v4i64c: 1884; SSE: # BB#0: 1885; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7] 1886; SSE-NEXT: pcmpeqd %xmm0, %xmm0 1887; SSE-NEXT: retq 1888; 1889; AVX-LABEL: max_ge_v4i64c: 1890; AVX: # BB#0: 1891; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7] 1892; AVX-NEXT: retq 1893 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 1894 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 1895 %3 = icmp uge <4 x i64> %1, %2 1896 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 1897 ret <4 x i64> %4 1898} 1899 1900define <4 x i32> @max_ge_v4i32c() { 1901; SSE-LABEL: max_ge_v4i32c: 1902; SSE: # BB#0: 1903; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] 1904; SSE-NEXT: retq 1905; 1906; AVX-LABEL: max_ge_v4i32c: 1907; AVX: # BB#0: 1908; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] 1909; AVX-NEXT: retq 1910 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 1911 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 1912 %3 = icmp uge <4 x i32> %1, %2 1913 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 1914 ret <4 x i32> %4 1915} 1916 1917define <8 x i32> @max_ge_v8i32c() { 1918; SSE-LABEL: max_ge_v8i32c: 1919; SSE: # BB#0: 1920; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] 1921; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] 1922; SSE-NEXT: retq 1923; 1924; AVX-LABEL: max_ge_v8i32c: 1925; AVX: # BB#0: 1926; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] 1927; AVX-NEXT: retq 1928 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 1929 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 1930 %3 = icmp uge <8 x i32> %1, %2 1931 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 1932 ret <8 x i32> %4 1933} 1934 1935define <8 x i16> @max_ge_v8i16c() { 1936; SSE-LABEL: max_ge_v8i16c: 1937; SSE: # BB#0: 1938; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] 1939; SSE-NEXT: retq 1940; 1941; AVX-LABEL: max_ge_v8i16c: 1942; AVX: # BB#0: 1943; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] 1944; AVX-NEXT: retq 1945 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 1946 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 1947 %3 = icmp uge <8 x i16> %1, %2 1948 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 1949 ret <8 x i16> %4 1950} 1951 1952define <16 x i16> @max_ge_v16i16c() { 1953; SSE-LABEL: max_ge_v16i16c: 1954; SSE: # BB#0: 1955; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] 1956; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] 1957; SSE-NEXT: retq 1958; 1959; AVX-LABEL: max_ge_v16i16c: 1960; AVX: # BB#0: 1961; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] 1962; AVX-NEXT: retq 1963 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 1964 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 1965 %3 = icmp uge <16 x i16> %1, %2 1966 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 1967 ret <16 x i16> %4 1968} 1969 1970define <16 x i8> @max_ge_v16i8c() { 1971; SSE-LABEL: max_ge_v16i8c: 1972; SSE: # BB#0: 1973; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] 1974; SSE-NEXT: retq 1975; 1976; AVX-LABEL: max_ge_v16i8c: 1977; AVX: # BB#0: 1978; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] 1979; AVX-NEXT: retq 1980 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 1981 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 1982 %3 = icmp uge <16 x i8> %1, %2 1983 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 1984 ret <16 x i8> %4 1985} 1986 1987define <2 x i64> @min_lt_v2i64c() { 1988; SSE-LABEL: min_lt_v2i64c: 1989; SSE: # BB#0: 1990; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1] 1991; SSE-NEXT: retq 1992; 1993; AVX-LABEL: min_lt_v2i64c: 1994; AVX: # BB#0: 1995; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1] 1996; AVX-NEXT: retq 1997 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 1998 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 1999 %3 = icmp ult <2 x i64> %1, %2 2000 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 2001 ret <2 x i64> %4 2002} 2003 2004define <4 x i64> @min_lt_v4i64c() { 2005; SSE-LABEL: min_lt_v4i64c: 2006; SSE: # BB#0: 2007; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609] 2008; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1] 2009; SSE-NEXT: retq 2010; 2011; AVX-LABEL: min_lt_v4i64c: 2012; AVX: # BB#0: 2013; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1] 2014; AVX-NEXT: retq 2015 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 2016 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 2017 %3 = icmp ult <4 x i64> %1, %2 2018 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 2019 ret <4 x i64> %4 2020} 2021 2022define <4 x i32> @min_lt_v4i32c() { 2023; SSE-LABEL: min_lt_v4i32c: 2024; SSE: # BB#0: 2025; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] 2026; SSE-NEXT: retq 2027; 2028; AVX-LABEL: min_lt_v4i32c: 2029; AVX: # BB#0: 2030; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] 2031; AVX-NEXT: retq 2032 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 2033 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 2034 %3 = icmp ult <4 x i32> %1, %2 2035 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 2036 ret <4 x i32> %4 2037} 2038 2039define <8 x i32> @min_lt_v8i32c() { 2040; SSE-LABEL: min_lt_v8i32c: 2041; SSE: # BB#0: 2042; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] 2043; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] 2044; SSE-NEXT: retq 2045; 2046; AVX-LABEL: min_lt_v8i32c: 2047; AVX: # BB#0: 2048; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] 2049; AVX-NEXT: retq 2050 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 2051 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 2052 %3 = icmp ult <8 x i32> %1, %2 2053 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 2054 ret <8 x i32> %4 2055} 2056 2057define <8 x i16> @min_lt_v8i16c() { 2058; SSE-LABEL: min_lt_v8i16c: 2059; SSE: # BB#0: 2060; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1] 2061; SSE-NEXT: retq 2062; 2063; AVX-LABEL: min_lt_v8i16c: 2064; AVX: # BB#0: 2065; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1] 2066; AVX-NEXT: retq 2067 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 2068 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 1, i32 0 2069 %3 = icmp ult <8 x i16> %1, %2 2070 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 2071 ret <8 x i16> %4 2072} 2073 2074define <16 x i16> @min_lt_v16i16c() { 2075; SSE-LABEL: min_lt_v16i16c: 2076; SSE: # BB#0: 2077; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0] 2078; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] 2079; SSE-NEXT: retq 2080; 2081; AVX-LABEL: min_lt_v16i16c: 2082; AVX: # BB#0: 2083; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] 2084; AVX-NEXT: retq 2085 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 2086 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 1, i32 0 2087 %3 = icmp ult <16 x i16> %1, %2 2088 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 2089 ret <16 x i16> %4 2090} 2091 2092define <16 x i8> @min_lt_v16i8c() { 2093; SSE-LABEL: min_lt_v16i8c: 2094; SSE: # BB#0: 2095; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] 2096; SSE-NEXT: retq 2097; 2098; AVX-LABEL: min_lt_v16i8c: 2099; AVX: # BB#0: 2100; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] 2101; AVX-NEXT: retq 2102 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 2103 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 1, i32 0 2104 %3 = icmp ult <16 x i8> %1, %2 2105 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 2106 ret <16 x i8> %4 2107} 2108 2109define <2 x i64> @min_le_v2i64c() { 2110; SSE-LABEL: min_le_v2i64c: 2111; SSE: # BB#0: 2112; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1] 2113; SSE-NEXT: retq 2114; 2115; AVX-LABEL: min_le_v2i64c: 2116; AVX: # BB#0: 2117; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1] 2118; AVX-NEXT: retq 2119 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 2120 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 2121 %3 = icmp ule <2 x i64> %1, %2 2122 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 2123 ret <2 x i64> %4 2124} 2125 2126define <4 x i64> @min_le_v4i64c() { 2127; SSE-LABEL: min_le_v4i64c: 2128; SSE: # BB#0: 2129; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609] 2130; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1] 2131; SSE-NEXT: retq 2132; 2133; AVX-LABEL: min_le_v4i64c: 2134; AVX: # BB#0: 2135; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1] 2136; AVX-NEXT: retq 2137 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 2138 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 2139 %3 = icmp ule <4 x i64> %1, %2 2140 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 2141 ret <4 x i64> %4 2142} 2143 2144define <4 x i32> @min_le_v4i32c() { 2145; SSE-LABEL: min_le_v4i32c: 2146; SSE: # BB#0: 2147; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] 2148; SSE-NEXT: retq 2149; 2150; AVX-LABEL: min_le_v4i32c: 2151; AVX: # BB#0: 2152; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] 2153; AVX-NEXT: retq 2154 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 2155 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 2156 %3 = icmp ule <4 x i32> %1, %2 2157 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 2158 ret <4 x i32> %4 2159} 2160 2161define <8 x i32> @min_le_v8i32c() { 2162; SSE-LABEL: min_le_v8i32c: 2163; SSE: # BB#0: 2164; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] 2165; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] 2166; SSE-NEXT: retq 2167; 2168; AVX-LABEL: min_le_v8i32c: 2169; AVX: # BB#0: 2170; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] 2171; AVX-NEXT: retq 2172 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 2173 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 2174 %3 = icmp ule <8 x i32> %1, %2 2175 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 2176 ret <8 x i32> %4 2177} 2178 2179define <8 x i16> @min_le_v8i16c() { 2180; SSE-LABEL: min_le_v8i16c: 2181; SSE: # BB#0: 2182; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] 2183; SSE-NEXT: retq 2184; 2185; AVX-LABEL: min_le_v8i16c: 2186; AVX: # BB#0: 2187; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] 2188; AVX-NEXT: retq 2189 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 2190 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 2191 %3 = icmp ule <8 x i16> %1, %2 2192 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 2193 ret <8 x i16> %4 2194} 2195 2196define <16 x i16> @min_le_v16i16c() { 2197; SSE-LABEL: min_le_v16i16c: 2198; SSE: # BB#0: 2199; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0] 2200; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] 2201; SSE-NEXT: retq 2202; 2203; AVX-LABEL: min_le_v16i16c: 2204; AVX: # BB#0: 2205; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] 2206; AVX-NEXT: retq 2207 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 2208 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 2209 %3 = icmp ule <16 x i16> %1, %2 2210 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 2211 ret <16 x i16> %4 2212} 2213 2214define <16 x i8> @min_le_v16i8c() { 2215; SSE-LABEL: min_le_v16i8c: 2216; SSE: # BB#0: 2217; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] 2218; SSE-NEXT: retq 2219; 2220; AVX-LABEL: min_le_v16i8c: 2221; AVX: # BB#0: 2222; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] 2223; AVX-NEXT: retq 2224 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 2225 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 2226 %3 = icmp ule <16 x i8> %1, %2 2227 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 2228 ret <16 x i8> %4 2229} 2230