1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 9 10; 11; Signed Maximum (GT) 12; 13 14define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) { 15; SSE2-LABEL: max_gt_v2i64: 16; SSE2: # %bb.0: 17; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 18; SSE2-NEXT: movdqa %xmm1, %xmm3 19; SSE2-NEXT: pxor %xmm2, %xmm3 20; SSE2-NEXT: pxor %xmm0, %xmm2 21; SSE2-NEXT: movdqa %xmm2, %xmm4 22; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 23; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 24; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 25; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 26; SSE2-NEXT: pand %xmm5, %xmm2 27; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 28; SSE2-NEXT: por %xmm2, %xmm3 29; SSE2-NEXT: pand %xmm3, %xmm0 30; SSE2-NEXT: pandn %xmm1, %xmm3 31; SSE2-NEXT: por %xmm3, %xmm0 32; SSE2-NEXT: retq 33; 34; SSE41-LABEL: max_gt_v2i64: 35; SSE41: # %bb.0: 36; SSE41-NEXT: movdqa %xmm0, %xmm2 37; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 38; SSE41-NEXT: movdqa %xmm1, %xmm0 39; SSE41-NEXT: pxor %xmm3, %xmm0 40; SSE41-NEXT: pxor %xmm2, %xmm3 41; SSE41-NEXT: movdqa %xmm3, %xmm4 42; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 43; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 44; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 45; SSE41-NEXT: pand %xmm4, %xmm0 46; SSE41-NEXT: por %xmm3, %xmm0 47; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 48; SSE41-NEXT: movapd %xmm1, %xmm0 49; SSE41-NEXT: retq 50; 51; SSE42-LABEL: max_gt_v2i64: 52; SSE42: # %bb.0: 53; SSE42-NEXT: movdqa %xmm0, %xmm2 54; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 55; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 56; SSE42-NEXT: movapd %xmm1, %xmm0 57; SSE42-NEXT: retq 58; 59; AVX1-LABEL: max_gt_v2i64: 60; AVX1: # %bb.0: 61; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 62; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 63; AVX1-NEXT: retq 64; 65; AVX2-LABEL: max_gt_v2i64: 66; AVX2: # %bb.0: 67; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 68; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 69; AVX2-NEXT: retq 70; 71; AVX512-LABEL: max_gt_v2i64: 72; AVX512: # %bb.0: 73; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 74; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 75; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 76; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 77; AVX512-NEXT: vzeroupper 78; AVX512-NEXT: retq 79 %1 = icmp sgt <2 x i64> %a, %b 80 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b 81 ret <2 x i64> %2 82} 83 84define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) { 85; SSE2-LABEL: max_gt_v4i64: 86; SSE2: # %bb.0: 87; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 88; SSE2-NEXT: movdqa %xmm2, %xmm5 89; SSE2-NEXT: pxor %xmm4, %xmm5 90; SSE2-NEXT: movdqa %xmm0, %xmm6 91; SSE2-NEXT: pxor %xmm4, %xmm6 92; SSE2-NEXT: movdqa %xmm6, %xmm7 93; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 94; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 95; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 96; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] 97; SSE2-NEXT: pand %xmm8, %xmm5 98; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] 99; SSE2-NEXT: por %xmm5, %xmm6 100; SSE2-NEXT: pand %xmm6, %xmm0 101; SSE2-NEXT: pandn %xmm2, %xmm6 102; SSE2-NEXT: por %xmm6, %xmm0 103; SSE2-NEXT: movdqa %xmm3, %xmm2 104; SSE2-NEXT: pxor %xmm4, %xmm2 105; SSE2-NEXT: pxor %xmm1, %xmm4 106; SSE2-NEXT: movdqa %xmm4, %xmm5 107; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 108; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 109; SSE2-NEXT: pcmpeqd %xmm2, %xmm4 110; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 111; SSE2-NEXT: pand %xmm6, %xmm2 112; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 113; SSE2-NEXT: por %xmm2, %xmm4 114; SSE2-NEXT: pand %xmm4, %xmm1 115; SSE2-NEXT: pandn %xmm3, %xmm4 116; SSE2-NEXT: por %xmm4, %xmm1 117; SSE2-NEXT: retq 118; 119; SSE41-LABEL: max_gt_v4i64: 120; SSE41: # %bb.0: 121; SSE41-NEXT: movdqa %xmm0, %xmm4 122; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648] 123; SSE41-NEXT: movdqa %xmm2, %xmm6 124; SSE41-NEXT: pxor %xmm5, %xmm6 125; SSE41-NEXT: movdqa %xmm0, %xmm7 126; SSE41-NEXT: pxor %xmm5, %xmm7 127; SSE41-NEXT: movdqa %xmm7, %xmm0 128; SSE41-NEXT: pcmpeqd %xmm6, %xmm0 129; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 130; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2] 131; SSE41-NEXT: pand %xmm6, %xmm0 132; SSE41-NEXT: por %xmm7, %xmm0 133; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 134; SSE41-NEXT: movdqa %xmm3, %xmm0 135; SSE41-NEXT: pxor %xmm5, %xmm0 136; SSE41-NEXT: pxor %xmm1, %xmm5 137; SSE41-NEXT: movdqa %xmm5, %xmm4 138; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 139; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 140; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 141; SSE41-NEXT: pand %xmm4, %xmm0 142; SSE41-NEXT: por %xmm5, %xmm0 143; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 144; SSE41-NEXT: movapd %xmm2, %xmm0 145; SSE41-NEXT: movapd %xmm3, %xmm1 146; SSE41-NEXT: retq 147; 148; SSE42-LABEL: max_gt_v4i64: 149; SSE42: # %bb.0: 150; SSE42-NEXT: movdqa %xmm0, %xmm4 151; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 152; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 153; SSE42-NEXT: movdqa %xmm1, %xmm0 154; SSE42-NEXT: pcmpgtq %xmm3, %xmm0 155; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 156; SSE42-NEXT: movapd %xmm2, %xmm0 157; SSE42-NEXT: movapd %xmm3, %xmm1 158; SSE42-NEXT: retq 159; 160; AVX1-LABEL: max_gt_v4i64: 161; AVX1: # %bb.0: 162; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 163; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 164; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 165; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 166; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 167; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 168; AVX1-NEXT: retq 169; 170; AVX2-LABEL: max_gt_v4i64: 171; AVX2: # %bb.0: 172; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 173; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 174; AVX2-NEXT: retq 175; 176; AVX512-LABEL: max_gt_v4i64: 177; AVX512: # %bb.0: 178; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 179; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 180; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 181; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 182; AVX512-NEXT: retq 183 %1 = icmp sgt <4 x i64> %a, %b 184 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b 185 ret <4 x i64> %2 186} 187 188define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) { 189; SSE2-LABEL: max_gt_v4i32: 190; SSE2: # %bb.0: 191; SSE2-NEXT: movdqa %xmm0, %xmm2 192; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 193; SSE2-NEXT: pand %xmm2, %xmm0 194; SSE2-NEXT: pandn %xmm1, %xmm2 195; SSE2-NEXT: por %xmm0, %xmm2 196; SSE2-NEXT: movdqa %xmm2, %xmm0 197; SSE2-NEXT: retq 198; 199; SSE41-LABEL: max_gt_v4i32: 200; SSE41: # %bb.0: 201; SSE41-NEXT: pmaxsd %xmm1, %xmm0 202; SSE41-NEXT: retq 203; 204; SSE42-LABEL: max_gt_v4i32: 205; SSE42: # %bb.0: 206; SSE42-NEXT: pmaxsd %xmm1, %xmm0 207; SSE42-NEXT: retq 208; 209; AVX-LABEL: max_gt_v4i32: 210; AVX: # %bb.0: 211; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 212; AVX-NEXT: retq 213 %1 = icmp sgt <4 x i32> %a, %b 214 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 215 ret <4 x i32> %2 216} 217 218define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) { 219; SSE2-LABEL: max_gt_v8i32: 220; SSE2: # %bb.0: 221; SSE2-NEXT: movdqa %xmm0, %xmm4 222; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 223; SSE2-NEXT: pand %xmm4, %xmm0 224; SSE2-NEXT: pandn %xmm2, %xmm4 225; SSE2-NEXT: por %xmm0, %xmm4 226; SSE2-NEXT: movdqa %xmm1, %xmm2 227; SSE2-NEXT: pcmpgtd %xmm3, %xmm2 228; SSE2-NEXT: pand %xmm2, %xmm1 229; SSE2-NEXT: pandn %xmm3, %xmm2 230; SSE2-NEXT: por %xmm1, %xmm2 231; SSE2-NEXT: movdqa %xmm4, %xmm0 232; SSE2-NEXT: movdqa %xmm2, %xmm1 233; SSE2-NEXT: retq 234; 235; SSE41-LABEL: max_gt_v8i32: 236; SSE41: # %bb.0: 237; SSE41-NEXT: pmaxsd %xmm2, %xmm0 238; SSE41-NEXT: pmaxsd %xmm3, %xmm1 239; SSE41-NEXT: retq 240; 241; SSE42-LABEL: max_gt_v8i32: 242; SSE42: # %bb.0: 243; SSE42-NEXT: pmaxsd %xmm2, %xmm0 244; SSE42-NEXT: pmaxsd %xmm3, %xmm1 245; SSE42-NEXT: retq 246; 247; AVX1-LABEL: max_gt_v8i32: 248; AVX1: # %bb.0: 249; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 250; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 251; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 252; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 253; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 254; AVX1-NEXT: retq 255; 256; AVX2-LABEL: max_gt_v8i32: 257; AVX2: # %bb.0: 258; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 259; AVX2-NEXT: retq 260; 261; AVX512-LABEL: max_gt_v8i32: 262; AVX512: # %bb.0: 263; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 264; AVX512-NEXT: retq 265 %1 = icmp sgt <8 x i32> %a, %b 266 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b 267 ret <8 x i32> %2 268} 269 270define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) { 271; SSE-LABEL: max_gt_v8i16: 272; SSE: # %bb.0: 273; SSE-NEXT: pmaxsw %xmm1, %xmm0 274; SSE-NEXT: retq 275; 276; AVX-LABEL: max_gt_v8i16: 277; AVX: # %bb.0: 278; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 279; AVX-NEXT: retq 280 %1 = icmp sgt <8 x i16> %a, %b 281 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 282 ret <8 x i16> %2 283} 284 285define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) { 286; SSE-LABEL: max_gt_v16i16: 287; SSE: # %bb.0: 288; SSE-NEXT: pmaxsw %xmm2, %xmm0 289; SSE-NEXT: pmaxsw %xmm3, %xmm1 290; SSE-NEXT: retq 291; 292; AVX1-LABEL: max_gt_v16i16: 293; AVX1: # %bb.0: 294; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 295; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 296; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 297; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 298; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 299; AVX1-NEXT: retq 300; 301; AVX2-LABEL: max_gt_v16i16: 302; AVX2: # %bb.0: 303; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 304; AVX2-NEXT: retq 305; 306; AVX512-LABEL: max_gt_v16i16: 307; AVX512: # %bb.0: 308; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 309; AVX512-NEXT: retq 310 %1 = icmp sgt <16 x i16> %a, %b 311 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b 312 ret <16 x i16> %2 313} 314 315define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) { 316; SSE2-LABEL: max_gt_v16i8: 317; SSE2: # %bb.0: 318; SSE2-NEXT: movdqa %xmm0, %xmm2 319; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 320; SSE2-NEXT: pand %xmm2, %xmm0 321; SSE2-NEXT: pandn %xmm1, %xmm2 322; SSE2-NEXT: por %xmm0, %xmm2 323; SSE2-NEXT: movdqa %xmm2, %xmm0 324; SSE2-NEXT: retq 325; 326; SSE41-LABEL: max_gt_v16i8: 327; SSE41: # %bb.0: 328; SSE41-NEXT: pmaxsb %xmm1, %xmm0 329; SSE41-NEXT: retq 330; 331; SSE42-LABEL: max_gt_v16i8: 332; SSE42: # %bb.0: 333; SSE42-NEXT: pmaxsb %xmm1, %xmm0 334; SSE42-NEXT: retq 335; 336; AVX-LABEL: max_gt_v16i8: 337; AVX: # %bb.0: 338; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 339; AVX-NEXT: retq 340 %1 = icmp sgt <16 x i8> %a, %b 341 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 342 ret <16 x i8> %2 343} 344 345define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) { 346; SSE2-LABEL: max_gt_v32i8: 347; SSE2: # %bb.0: 348; SSE2-NEXT: movdqa %xmm0, %xmm4 349; SSE2-NEXT: pcmpgtb %xmm2, %xmm4 350; SSE2-NEXT: pand %xmm4, %xmm0 351; SSE2-NEXT: pandn %xmm2, %xmm4 352; SSE2-NEXT: por %xmm0, %xmm4 353; SSE2-NEXT: movdqa %xmm1, %xmm2 354; SSE2-NEXT: pcmpgtb %xmm3, %xmm2 355; SSE2-NEXT: pand %xmm2, %xmm1 356; SSE2-NEXT: pandn %xmm3, %xmm2 357; SSE2-NEXT: por %xmm1, %xmm2 358; SSE2-NEXT: movdqa %xmm4, %xmm0 359; SSE2-NEXT: movdqa %xmm2, %xmm1 360; SSE2-NEXT: retq 361; 362; SSE41-LABEL: max_gt_v32i8: 363; SSE41: # %bb.0: 364; SSE41-NEXT: pmaxsb %xmm2, %xmm0 365; SSE41-NEXT: pmaxsb %xmm3, %xmm1 366; SSE41-NEXT: retq 367; 368; SSE42-LABEL: max_gt_v32i8: 369; SSE42: # %bb.0: 370; SSE42-NEXT: pmaxsb %xmm2, %xmm0 371; SSE42-NEXT: pmaxsb %xmm3, %xmm1 372; SSE42-NEXT: retq 373; 374; AVX1-LABEL: max_gt_v32i8: 375; AVX1: # %bb.0: 376; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 377; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 378; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 379; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 380; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 381; AVX1-NEXT: retq 382; 383; AVX2-LABEL: max_gt_v32i8: 384; AVX2: # %bb.0: 385; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 386; AVX2-NEXT: retq 387; 388; AVX512-LABEL: max_gt_v32i8: 389; AVX512: # %bb.0: 390; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 391; AVX512-NEXT: retq 392 %1 = icmp sgt <32 x i8> %a, %b 393 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b 394 ret <32 x i8> %2 395} 396 397; 398; Signed Maximum (GE) 399; 400 401define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) { 402; SSE2-LABEL: max_ge_v2i64: 403; SSE2: # %bb.0: 404; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 405; SSE2-NEXT: movdqa %xmm1, %xmm3 406; SSE2-NEXT: pxor %xmm2, %xmm3 407; SSE2-NEXT: pxor %xmm0, %xmm2 408; SSE2-NEXT: movdqa %xmm2, %xmm4 409; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 410; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 411; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 412; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 413; SSE2-NEXT: pand %xmm5, %xmm2 414; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 415; SSE2-NEXT: por %xmm2, %xmm3 416; SSE2-NEXT: pand %xmm3, %xmm0 417; SSE2-NEXT: pandn %xmm1, %xmm3 418; SSE2-NEXT: por %xmm3, %xmm0 419; SSE2-NEXT: retq 420; 421; SSE41-LABEL: max_ge_v2i64: 422; SSE41: # %bb.0: 423; SSE41-NEXT: movdqa %xmm0, %xmm2 424; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 425; SSE41-NEXT: movdqa %xmm1, %xmm0 426; SSE41-NEXT: pxor %xmm3, %xmm0 427; SSE41-NEXT: pxor %xmm2, %xmm3 428; SSE41-NEXT: movdqa %xmm3, %xmm4 429; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 430; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 431; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 432; SSE41-NEXT: pand %xmm4, %xmm0 433; SSE41-NEXT: por %xmm3, %xmm0 434; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 435; SSE41-NEXT: movapd %xmm1, %xmm0 436; SSE41-NEXT: retq 437; 438; SSE42-LABEL: max_ge_v2i64: 439; SSE42: # %bb.0: 440; SSE42-NEXT: movdqa %xmm0, %xmm2 441; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 442; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 443; SSE42-NEXT: movapd %xmm1, %xmm0 444; SSE42-NEXT: retq 445; 446; AVX1-LABEL: max_ge_v2i64: 447; AVX1: # %bb.0: 448; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 449; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 450; AVX1-NEXT: retq 451; 452; AVX2-LABEL: max_ge_v2i64: 453; AVX2: # %bb.0: 454; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2 455; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 456; AVX2-NEXT: retq 457; 458; AVX512-LABEL: max_ge_v2i64: 459; AVX512: # %bb.0: 460; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 461; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 462; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 463; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 464; AVX512-NEXT: vzeroupper 465; AVX512-NEXT: retq 466 %1 = icmp sge <2 x i64> %a, %b 467 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b 468 ret <2 x i64> %2 469} 470 471define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) { 472; SSE2-LABEL: max_ge_v4i64: 473; SSE2: # %bb.0: 474; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 475; SSE2-NEXT: movdqa %xmm2, %xmm5 476; SSE2-NEXT: pxor %xmm4, %xmm5 477; SSE2-NEXT: movdqa %xmm0, %xmm6 478; SSE2-NEXT: pxor %xmm4, %xmm6 479; SSE2-NEXT: movdqa %xmm6, %xmm7 480; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 481; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 482; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 483; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] 484; SSE2-NEXT: pand %xmm8, %xmm5 485; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] 486; SSE2-NEXT: por %xmm5, %xmm6 487; SSE2-NEXT: pand %xmm6, %xmm0 488; SSE2-NEXT: pandn %xmm2, %xmm6 489; SSE2-NEXT: por %xmm6, %xmm0 490; SSE2-NEXT: movdqa %xmm3, %xmm2 491; SSE2-NEXT: pxor %xmm4, %xmm2 492; SSE2-NEXT: pxor %xmm1, %xmm4 493; SSE2-NEXT: movdqa %xmm4, %xmm5 494; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 495; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 496; SSE2-NEXT: pcmpeqd %xmm2, %xmm4 497; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 498; SSE2-NEXT: pand %xmm6, %xmm2 499; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 500; SSE2-NEXT: por %xmm2, %xmm4 501; SSE2-NEXT: pand %xmm4, %xmm1 502; SSE2-NEXT: pandn %xmm3, %xmm4 503; SSE2-NEXT: por %xmm4, %xmm1 504; SSE2-NEXT: retq 505; 506; SSE41-LABEL: max_ge_v4i64: 507; SSE41: # %bb.0: 508; SSE41-NEXT: movdqa %xmm0, %xmm4 509; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648] 510; SSE41-NEXT: movdqa %xmm2, %xmm6 511; SSE41-NEXT: pxor %xmm5, %xmm6 512; SSE41-NEXT: movdqa %xmm0, %xmm7 513; SSE41-NEXT: pxor %xmm5, %xmm7 514; SSE41-NEXT: movdqa %xmm7, %xmm0 515; SSE41-NEXT: pcmpeqd %xmm6, %xmm0 516; SSE41-NEXT: pcmpgtd %xmm6, %xmm7 517; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm7[0,0,2,2] 518; SSE41-NEXT: pand %xmm6, %xmm0 519; SSE41-NEXT: por %xmm7, %xmm0 520; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 521; SSE41-NEXT: movdqa %xmm3, %xmm0 522; SSE41-NEXT: pxor %xmm5, %xmm0 523; SSE41-NEXT: pxor %xmm1, %xmm5 524; SSE41-NEXT: movdqa %xmm5, %xmm4 525; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 526; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 527; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 528; SSE41-NEXT: pand %xmm4, %xmm0 529; SSE41-NEXT: por %xmm5, %xmm0 530; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 531; SSE41-NEXT: movapd %xmm2, %xmm0 532; SSE41-NEXT: movapd %xmm3, %xmm1 533; SSE41-NEXT: retq 534; 535; SSE42-LABEL: max_ge_v4i64: 536; SSE42: # %bb.0: 537; SSE42-NEXT: movdqa %xmm0, %xmm4 538; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 539; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 540; SSE42-NEXT: movdqa %xmm1, %xmm0 541; SSE42-NEXT: pcmpgtq %xmm3, %xmm0 542; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 543; SSE42-NEXT: movapd %xmm2, %xmm0 544; SSE42-NEXT: movapd %xmm3, %xmm1 545; SSE42-NEXT: retq 546; 547; AVX1-LABEL: max_ge_v4i64: 548; AVX1: # %bb.0: 549; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 550; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 551; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 552; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3 553; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 554; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 555; AVX1-NEXT: retq 556; 557; AVX2-LABEL: max_ge_v4i64: 558; AVX2: # %bb.0: 559; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2 560; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 561; AVX2-NEXT: retq 562; 563; AVX512-LABEL: max_ge_v4i64: 564; AVX512: # %bb.0: 565; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 566; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 567; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 568; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 569; AVX512-NEXT: retq 570 %1 = icmp sge <4 x i64> %a, %b 571 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b 572 ret <4 x i64> %2 573} 574 575define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) { 576; SSE2-LABEL: max_ge_v4i32: 577; SSE2: # %bb.0: 578; SSE2-NEXT: movdqa %xmm0, %xmm2 579; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 580; SSE2-NEXT: pand %xmm2, %xmm0 581; SSE2-NEXT: pandn %xmm1, %xmm2 582; SSE2-NEXT: por %xmm0, %xmm2 583; SSE2-NEXT: movdqa %xmm2, %xmm0 584; SSE2-NEXT: retq 585; 586; SSE41-LABEL: max_ge_v4i32: 587; SSE41: # %bb.0: 588; SSE41-NEXT: pmaxsd %xmm1, %xmm0 589; SSE41-NEXT: retq 590; 591; SSE42-LABEL: max_ge_v4i32: 592; SSE42: # %bb.0: 593; SSE42-NEXT: pmaxsd %xmm1, %xmm0 594; SSE42-NEXT: retq 595; 596; AVX-LABEL: max_ge_v4i32: 597; AVX: # %bb.0: 598; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 599; AVX-NEXT: retq 600 %1 = icmp sge <4 x i32> %a, %b 601 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 602 ret <4 x i32> %2 603} 604 605define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) { 606; SSE2-LABEL: max_ge_v8i32: 607; SSE2: # %bb.0: 608; SSE2-NEXT: movdqa %xmm0, %xmm4 609; SSE2-NEXT: pcmpgtd %xmm2, %xmm4 610; SSE2-NEXT: pand %xmm4, %xmm0 611; SSE2-NEXT: pandn %xmm2, %xmm4 612; SSE2-NEXT: por %xmm0, %xmm4 613; SSE2-NEXT: movdqa %xmm1, %xmm2 614; SSE2-NEXT: pcmpgtd %xmm3, %xmm2 615; SSE2-NEXT: pand %xmm2, %xmm1 616; SSE2-NEXT: pandn %xmm3, %xmm2 617; SSE2-NEXT: por %xmm1, %xmm2 618; SSE2-NEXT: movdqa %xmm4, %xmm0 619; SSE2-NEXT: movdqa %xmm2, %xmm1 620; SSE2-NEXT: retq 621; 622; SSE41-LABEL: max_ge_v8i32: 623; SSE41: # %bb.0: 624; SSE41-NEXT: pmaxsd %xmm2, %xmm0 625; SSE41-NEXT: pmaxsd %xmm3, %xmm1 626; SSE41-NEXT: retq 627; 628; SSE42-LABEL: max_ge_v8i32: 629; SSE42: # %bb.0: 630; SSE42-NEXT: pmaxsd %xmm2, %xmm0 631; SSE42-NEXT: pmaxsd %xmm3, %xmm1 632; SSE42-NEXT: retq 633; 634; AVX1-LABEL: max_ge_v8i32: 635; AVX1: # %bb.0: 636; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 637; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 638; AVX1-NEXT: vpmaxsd %xmm2, %xmm3, %xmm2 639; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 640; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 641; AVX1-NEXT: retq 642; 643; AVX2-LABEL: max_ge_v8i32: 644; AVX2: # %bb.0: 645; AVX2-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 646; AVX2-NEXT: retq 647; 648; AVX512-LABEL: max_ge_v8i32: 649; AVX512: # %bb.0: 650; AVX512-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 651; AVX512-NEXT: retq 652 %1 = icmp sge <8 x i32> %a, %b 653 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b 654 ret <8 x i32> %2 655} 656 657define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) { 658; SSE-LABEL: max_ge_v8i16: 659; SSE: # %bb.0: 660; SSE-NEXT: pmaxsw %xmm1, %xmm0 661; SSE-NEXT: retq 662; 663; AVX-LABEL: max_ge_v8i16: 664; AVX: # %bb.0: 665; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 666; AVX-NEXT: retq 667 %1 = icmp sge <8 x i16> %a, %b 668 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 669 ret <8 x i16> %2 670} 671 672define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) { 673; SSE-LABEL: max_ge_v16i16: 674; SSE: # %bb.0: 675; SSE-NEXT: pmaxsw %xmm2, %xmm0 676; SSE-NEXT: pmaxsw %xmm3, %xmm1 677; SSE-NEXT: retq 678; 679; AVX1-LABEL: max_ge_v16i16: 680; AVX1: # %bb.0: 681; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 682; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 683; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 684; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 685; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 686; AVX1-NEXT: retq 687; 688; AVX2-LABEL: max_ge_v16i16: 689; AVX2: # %bb.0: 690; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 691; AVX2-NEXT: retq 692; 693; AVX512-LABEL: max_ge_v16i16: 694; AVX512: # %bb.0: 695; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 696; AVX512-NEXT: retq 697 %1 = icmp sge <16 x i16> %a, %b 698 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b 699 ret <16 x i16> %2 700} 701 702define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) { 703; SSE2-LABEL: max_ge_v16i8: 704; SSE2: # %bb.0: 705; SSE2-NEXT: movdqa %xmm0, %xmm2 706; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 707; SSE2-NEXT: pand %xmm2, %xmm0 708; SSE2-NEXT: pandn %xmm1, %xmm2 709; SSE2-NEXT: por %xmm0, %xmm2 710; SSE2-NEXT: movdqa %xmm2, %xmm0 711; SSE2-NEXT: retq 712; 713; SSE41-LABEL: max_ge_v16i8: 714; SSE41: # %bb.0: 715; SSE41-NEXT: pmaxsb %xmm1, %xmm0 716; SSE41-NEXT: retq 717; 718; SSE42-LABEL: max_ge_v16i8: 719; SSE42: # %bb.0: 720; SSE42-NEXT: pmaxsb %xmm1, %xmm0 721; SSE42-NEXT: retq 722; 723; AVX-LABEL: max_ge_v16i8: 724; AVX: # %bb.0: 725; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 726; AVX-NEXT: retq 727 %1 = icmp sge <16 x i8> %a, %b 728 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 729 ret <16 x i8> %2 730} 731 732define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) { 733; SSE2-LABEL: max_ge_v32i8: 734; SSE2: # %bb.0: 735; SSE2-NEXT: movdqa %xmm0, %xmm4 736; SSE2-NEXT: pcmpgtb %xmm2, %xmm4 737; SSE2-NEXT: pand %xmm4, %xmm0 738; SSE2-NEXT: pandn %xmm2, %xmm4 739; SSE2-NEXT: por %xmm0, %xmm4 740; SSE2-NEXT: movdqa %xmm1, %xmm2 741; SSE2-NEXT: pcmpgtb %xmm3, %xmm2 742; SSE2-NEXT: pand %xmm2, %xmm1 743; SSE2-NEXT: pandn %xmm3, %xmm2 744; SSE2-NEXT: por %xmm1, %xmm2 745; SSE2-NEXT: movdqa %xmm4, %xmm0 746; SSE2-NEXT: movdqa %xmm2, %xmm1 747; SSE2-NEXT: retq 748; 749; SSE41-LABEL: max_ge_v32i8: 750; SSE41: # %bb.0: 751; SSE41-NEXT: pmaxsb %xmm2, %xmm0 752; SSE41-NEXT: pmaxsb %xmm3, %xmm1 753; SSE41-NEXT: retq 754; 755; SSE42-LABEL: max_ge_v32i8: 756; SSE42: # %bb.0: 757; SSE42-NEXT: pmaxsb %xmm2, %xmm0 758; SSE42-NEXT: pmaxsb %xmm3, %xmm1 759; SSE42-NEXT: retq 760; 761; AVX1-LABEL: max_ge_v32i8: 762; AVX1: # %bb.0: 763; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 764; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 765; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 766; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 767; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 768; AVX1-NEXT: retq 769; 770; AVX2-LABEL: max_ge_v32i8: 771; AVX2: # %bb.0: 772; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 773; AVX2-NEXT: retq 774; 775; AVX512-LABEL: max_ge_v32i8: 776; AVX512: # %bb.0: 777; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 778; AVX512-NEXT: retq 779 %1 = icmp sge <32 x i8> %a, %b 780 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b 781 ret <32 x i8> %2 782} 783 784; 785; Signed Minimum (LT) 786; 787 788define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) { 789; SSE2-LABEL: min_lt_v2i64: 790; SSE2: # %bb.0: 791; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 792; SSE2-NEXT: movdqa %xmm0, %xmm3 793; SSE2-NEXT: pxor %xmm2, %xmm3 794; SSE2-NEXT: pxor %xmm1, %xmm2 795; SSE2-NEXT: movdqa %xmm2, %xmm4 796; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 797; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 798; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 799; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 800; SSE2-NEXT: pand %xmm5, %xmm2 801; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 802; SSE2-NEXT: por %xmm2, %xmm3 803; SSE2-NEXT: pand %xmm3, %xmm0 804; SSE2-NEXT: pandn %xmm1, %xmm3 805; SSE2-NEXT: por %xmm3, %xmm0 806; SSE2-NEXT: retq 807; 808; SSE41-LABEL: min_lt_v2i64: 809; SSE41: # %bb.0: 810; SSE41-NEXT: movdqa %xmm0, %xmm2 811; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 812; SSE41-NEXT: pxor %xmm3, %xmm0 813; SSE41-NEXT: pxor %xmm1, %xmm3 814; SSE41-NEXT: movdqa %xmm3, %xmm4 815; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 816; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 817; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 818; SSE41-NEXT: pand %xmm4, %xmm0 819; SSE41-NEXT: por %xmm3, %xmm0 820; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 821; SSE41-NEXT: movapd %xmm1, %xmm0 822; SSE41-NEXT: retq 823; 824; SSE42-LABEL: min_lt_v2i64: 825; SSE42: # %bb.0: 826; SSE42-NEXT: movdqa %xmm0, %xmm2 827; SSE42-NEXT: movdqa %xmm1, %xmm0 828; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 829; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 830; SSE42-NEXT: movapd %xmm1, %xmm0 831; SSE42-NEXT: retq 832; 833; AVX1-LABEL: min_lt_v2i64: 834; AVX1: # %bb.0: 835; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 836; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 837; AVX1-NEXT: retq 838; 839; AVX2-LABEL: min_lt_v2i64: 840; AVX2: # %bb.0: 841; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 842; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 843; AVX2-NEXT: retq 844; 845; AVX512-LABEL: min_lt_v2i64: 846; AVX512: # %bb.0: 847; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 848; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 849; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 850; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 851; AVX512-NEXT: vzeroupper 852; AVX512-NEXT: retq 853 %1 = icmp slt <2 x i64> %a, %b 854 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b 855 ret <2 x i64> %2 856} 857 858define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) { 859; SSE2-LABEL: min_lt_v4i64: 860; SSE2: # %bb.0: 861; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 862; SSE2-NEXT: movdqa %xmm0, %xmm5 863; SSE2-NEXT: pxor %xmm4, %xmm5 864; SSE2-NEXT: movdqa %xmm2, %xmm6 865; SSE2-NEXT: pxor %xmm4, %xmm6 866; SSE2-NEXT: movdqa %xmm6, %xmm7 867; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 868; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 869; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 870; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] 871; SSE2-NEXT: pand %xmm8, %xmm5 872; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] 873; SSE2-NEXT: por %xmm5, %xmm6 874; SSE2-NEXT: pand %xmm6, %xmm0 875; SSE2-NEXT: pandn %xmm2, %xmm6 876; SSE2-NEXT: por %xmm6, %xmm0 877; SSE2-NEXT: movdqa %xmm1, %xmm2 878; SSE2-NEXT: pxor %xmm4, %xmm2 879; SSE2-NEXT: pxor %xmm3, %xmm4 880; SSE2-NEXT: movdqa %xmm4, %xmm5 881; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 882; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 883; SSE2-NEXT: pcmpeqd %xmm2, %xmm4 884; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 885; SSE2-NEXT: pand %xmm6, %xmm2 886; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 887; SSE2-NEXT: por %xmm2, %xmm4 888; SSE2-NEXT: pand %xmm4, %xmm1 889; SSE2-NEXT: pandn %xmm3, %xmm4 890; SSE2-NEXT: por %xmm4, %xmm1 891; SSE2-NEXT: retq 892; 893; SSE41-LABEL: min_lt_v4i64: 894; SSE41: # %bb.0: 895; SSE41-NEXT: movdqa %xmm0, %xmm4 896; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648] 897; SSE41-NEXT: pxor %xmm5, %xmm0 898; SSE41-NEXT: movdqa %xmm2, %xmm6 899; SSE41-NEXT: pxor %xmm5, %xmm6 900; SSE41-NEXT: movdqa %xmm6, %xmm7 901; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 902; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 903; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 904; SSE41-NEXT: pand %xmm7, %xmm0 905; SSE41-NEXT: por %xmm6, %xmm0 906; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 907; SSE41-NEXT: movdqa %xmm1, %xmm0 908; SSE41-NEXT: pxor %xmm5, %xmm0 909; SSE41-NEXT: pxor %xmm3, %xmm5 910; SSE41-NEXT: movdqa %xmm5, %xmm4 911; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 912; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 913; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 914; SSE41-NEXT: pand %xmm4, %xmm0 915; SSE41-NEXT: por %xmm5, %xmm0 916; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 917; SSE41-NEXT: movapd %xmm2, %xmm0 918; SSE41-NEXT: movapd %xmm3, %xmm1 919; SSE41-NEXT: retq 920; 921; SSE42-LABEL: min_lt_v4i64: 922; SSE42: # %bb.0: 923; SSE42-NEXT: movdqa %xmm0, %xmm4 924; SSE42-NEXT: movdqa %xmm2, %xmm0 925; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 926; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 927; SSE42-NEXT: movdqa %xmm3, %xmm0 928; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 929; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 930; SSE42-NEXT: movapd %xmm2, %xmm0 931; SSE42-NEXT: movapd %xmm3, %xmm1 932; SSE42-NEXT: retq 933; 934; AVX1-LABEL: min_lt_v4i64: 935; AVX1: # %bb.0: 936; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 937; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 938; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 939; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 940; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 941; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 942; AVX1-NEXT: retq 943; 944; AVX2-LABEL: min_lt_v4i64: 945; AVX2: # %bb.0: 946; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 947; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 948; AVX2-NEXT: retq 949; 950; AVX512-LABEL: min_lt_v4i64: 951; AVX512: # %bb.0: 952; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 953; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 954; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 955; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 956; AVX512-NEXT: retq 957 %1 = icmp slt <4 x i64> %a, %b 958 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b 959 ret <4 x i64> %2 960} 961 962define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) { 963; SSE2-LABEL: min_lt_v4i32: 964; SSE2: # %bb.0: 965; SSE2-NEXT: movdqa %xmm1, %xmm2 966; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 967; SSE2-NEXT: pand %xmm2, %xmm0 968; SSE2-NEXT: pandn %xmm1, %xmm2 969; SSE2-NEXT: por %xmm2, %xmm0 970; SSE2-NEXT: retq 971; 972; SSE41-LABEL: min_lt_v4i32: 973; SSE41: # %bb.0: 974; SSE41-NEXT: pminsd %xmm1, %xmm0 975; SSE41-NEXT: retq 976; 977; SSE42-LABEL: min_lt_v4i32: 978; SSE42: # %bb.0: 979; SSE42-NEXT: pminsd %xmm1, %xmm0 980; SSE42-NEXT: retq 981; 982; AVX-LABEL: min_lt_v4i32: 983; AVX: # %bb.0: 984; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 985; AVX-NEXT: retq 986 %1 = icmp slt <4 x i32> %a, %b 987 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 988 ret <4 x i32> %2 989} 990 991define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) { 992; SSE2-LABEL: min_lt_v8i32: 993; SSE2: # %bb.0: 994; SSE2-NEXT: movdqa %xmm2, %xmm4 995; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 996; SSE2-NEXT: pand %xmm4, %xmm0 997; SSE2-NEXT: pandn %xmm2, %xmm4 998; SSE2-NEXT: por %xmm4, %xmm0 999; SSE2-NEXT: movdqa %xmm3, %xmm2 1000; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1001; SSE2-NEXT: pand %xmm2, %xmm1 1002; SSE2-NEXT: pandn %xmm3, %xmm2 1003; SSE2-NEXT: por %xmm2, %xmm1 1004; SSE2-NEXT: retq 1005; 1006; SSE41-LABEL: min_lt_v8i32: 1007; SSE41: # %bb.0: 1008; SSE41-NEXT: pminsd %xmm2, %xmm0 1009; SSE41-NEXT: pminsd %xmm3, %xmm1 1010; SSE41-NEXT: retq 1011; 1012; SSE42-LABEL: min_lt_v8i32: 1013; SSE42: # %bb.0: 1014; SSE42-NEXT: pminsd %xmm2, %xmm0 1015; SSE42-NEXT: pminsd %xmm3, %xmm1 1016; SSE42-NEXT: retq 1017; 1018; AVX1-LABEL: min_lt_v8i32: 1019; AVX1: # %bb.0: 1020; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1021; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1022; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 1023; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1024; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1025; AVX1-NEXT: retq 1026; 1027; AVX2-LABEL: min_lt_v8i32: 1028; AVX2: # %bb.0: 1029; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1030; AVX2-NEXT: retq 1031; 1032; AVX512-LABEL: min_lt_v8i32: 1033; AVX512: # %bb.0: 1034; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1035; AVX512-NEXT: retq 1036 %1 = icmp slt <8 x i32> %a, %b 1037 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b 1038 ret <8 x i32> %2 1039} 1040 1041define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) { 1042; SSE-LABEL: min_lt_v8i16: 1043; SSE: # %bb.0: 1044; SSE-NEXT: pminsw %xmm1, %xmm0 1045; SSE-NEXT: retq 1046; 1047; AVX-LABEL: min_lt_v8i16: 1048; AVX: # %bb.0: 1049; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1050; AVX-NEXT: retq 1051 %1 = icmp slt <8 x i16> %a, %b 1052 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 1053 ret <8 x i16> %2 1054} 1055 1056define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) { 1057; SSE-LABEL: min_lt_v16i16: 1058; SSE: # %bb.0: 1059; SSE-NEXT: pminsw %xmm2, %xmm0 1060; SSE-NEXT: pminsw %xmm3, %xmm1 1061; SSE-NEXT: retq 1062; 1063; AVX1-LABEL: min_lt_v16i16: 1064; AVX1: # %bb.0: 1065; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1066; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1067; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 1068; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1069; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1070; AVX1-NEXT: retq 1071; 1072; AVX2-LABEL: min_lt_v16i16: 1073; AVX2: # %bb.0: 1074; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1075; AVX2-NEXT: retq 1076; 1077; AVX512-LABEL: min_lt_v16i16: 1078; AVX512: # %bb.0: 1079; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1080; AVX512-NEXT: retq 1081 %1 = icmp slt <16 x i16> %a, %b 1082 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b 1083 ret <16 x i16> %2 1084} 1085 1086define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) { 1087; SSE2-LABEL: min_lt_v16i8: 1088; SSE2: # %bb.0: 1089; SSE2-NEXT: movdqa %xmm1, %xmm2 1090; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1091; SSE2-NEXT: pand %xmm2, %xmm0 1092; SSE2-NEXT: pandn %xmm1, %xmm2 1093; SSE2-NEXT: por %xmm2, %xmm0 1094; SSE2-NEXT: retq 1095; 1096; SSE41-LABEL: min_lt_v16i8: 1097; SSE41: # %bb.0: 1098; SSE41-NEXT: pminsb %xmm1, %xmm0 1099; SSE41-NEXT: retq 1100; 1101; SSE42-LABEL: min_lt_v16i8: 1102; SSE42: # %bb.0: 1103; SSE42-NEXT: pminsb %xmm1, %xmm0 1104; SSE42-NEXT: retq 1105; 1106; AVX-LABEL: min_lt_v16i8: 1107; AVX: # %bb.0: 1108; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1109; AVX-NEXT: retq 1110 %1 = icmp slt <16 x i8> %a, %b 1111 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 1112 ret <16 x i8> %2 1113} 1114 1115define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) { 1116; SSE2-LABEL: min_lt_v32i8: 1117; SSE2: # %bb.0: 1118; SSE2-NEXT: movdqa %xmm2, %xmm4 1119; SSE2-NEXT: pcmpgtb %xmm0, %xmm4 1120; SSE2-NEXT: pand %xmm4, %xmm0 1121; SSE2-NEXT: pandn %xmm2, %xmm4 1122; SSE2-NEXT: por %xmm4, %xmm0 1123; SSE2-NEXT: movdqa %xmm3, %xmm2 1124; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1125; SSE2-NEXT: pand %xmm2, %xmm1 1126; SSE2-NEXT: pandn %xmm3, %xmm2 1127; SSE2-NEXT: por %xmm2, %xmm1 1128; SSE2-NEXT: retq 1129; 1130; SSE41-LABEL: min_lt_v32i8: 1131; SSE41: # %bb.0: 1132; SSE41-NEXT: pminsb %xmm2, %xmm0 1133; SSE41-NEXT: pminsb %xmm3, %xmm1 1134; SSE41-NEXT: retq 1135; 1136; SSE42-LABEL: min_lt_v32i8: 1137; SSE42: # %bb.0: 1138; SSE42-NEXT: pminsb %xmm2, %xmm0 1139; SSE42-NEXT: pminsb %xmm3, %xmm1 1140; SSE42-NEXT: retq 1141; 1142; AVX1-LABEL: min_lt_v32i8: 1143; AVX1: # %bb.0: 1144; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1145; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1146; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 1147; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1148; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1149; AVX1-NEXT: retq 1150; 1151; AVX2-LABEL: min_lt_v32i8: 1152; AVX2: # %bb.0: 1153; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1154; AVX2-NEXT: retq 1155; 1156; AVX512-LABEL: min_lt_v32i8: 1157; AVX512: # %bb.0: 1158; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1159; AVX512-NEXT: retq 1160 %1 = icmp slt <32 x i8> %a, %b 1161 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b 1162 ret <32 x i8> %2 1163} 1164 1165; 1166; Signed Minimum (LE) 1167; 1168 1169define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) { 1170; SSE2-LABEL: min_le_v2i64: 1171; SSE2: # %bb.0: 1172; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 1173; SSE2-NEXT: movdqa %xmm0, %xmm3 1174; SSE2-NEXT: pxor %xmm2, %xmm3 1175; SSE2-NEXT: pxor %xmm1, %xmm2 1176; SSE2-NEXT: movdqa %xmm2, %xmm4 1177; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1178; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] 1179; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 1180; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1181; SSE2-NEXT: pand %xmm5, %xmm2 1182; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1183; SSE2-NEXT: por %xmm2, %xmm3 1184; SSE2-NEXT: pand %xmm3, %xmm0 1185; SSE2-NEXT: pandn %xmm1, %xmm3 1186; SSE2-NEXT: por %xmm3, %xmm0 1187; SSE2-NEXT: retq 1188; 1189; SSE41-LABEL: min_le_v2i64: 1190; SSE41: # %bb.0: 1191; SSE41-NEXT: movdqa %xmm0, %xmm2 1192; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648] 1193; SSE41-NEXT: pxor %xmm3, %xmm0 1194; SSE41-NEXT: pxor %xmm1, %xmm3 1195; SSE41-NEXT: movdqa %xmm3, %xmm4 1196; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 1197; SSE41-NEXT: pcmpgtd %xmm0, %xmm3 1198; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] 1199; SSE41-NEXT: pand %xmm4, %xmm0 1200; SSE41-NEXT: por %xmm3, %xmm0 1201; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 1202; SSE41-NEXT: movapd %xmm1, %xmm0 1203; SSE41-NEXT: retq 1204; 1205; SSE42-LABEL: min_le_v2i64: 1206; SSE42: # %bb.0: 1207; SSE42-NEXT: movdqa %xmm0, %xmm2 1208; SSE42-NEXT: movdqa %xmm1, %xmm0 1209; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 1210; SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1 1211; SSE42-NEXT: movapd %xmm1, %xmm0 1212; SSE42-NEXT: retq 1213; 1214; AVX1-LABEL: min_le_v2i64: 1215; AVX1: # %bb.0: 1216; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1217; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1218; AVX1-NEXT: retq 1219; 1220; AVX2-LABEL: min_le_v2i64: 1221; AVX2: # %bb.0: 1222; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2 1223; AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 1224; AVX2-NEXT: retq 1225; 1226; AVX512-LABEL: min_le_v2i64: 1227; AVX512: # %bb.0: 1228; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1229; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1230; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 1231; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1232; AVX512-NEXT: vzeroupper 1233; AVX512-NEXT: retq 1234 %1 = icmp sle <2 x i64> %a, %b 1235 %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b 1236 ret <2 x i64> %2 1237} 1238 1239define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) { 1240; SSE2-LABEL: min_le_v4i64: 1241; SSE2: # %bb.0: 1242; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] 1243; SSE2-NEXT: movdqa %xmm0, %xmm5 1244; SSE2-NEXT: pxor %xmm4, %xmm5 1245; SSE2-NEXT: movdqa %xmm2, %xmm6 1246; SSE2-NEXT: pxor %xmm4, %xmm6 1247; SSE2-NEXT: movdqa %xmm6, %xmm7 1248; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 1249; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] 1250; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 1251; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] 1252; SSE2-NEXT: pand %xmm8, %xmm5 1253; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] 1254; SSE2-NEXT: por %xmm5, %xmm6 1255; SSE2-NEXT: pand %xmm6, %xmm0 1256; SSE2-NEXT: pandn %xmm2, %xmm6 1257; SSE2-NEXT: por %xmm6, %xmm0 1258; SSE2-NEXT: movdqa %xmm1, %xmm2 1259; SSE2-NEXT: pxor %xmm4, %xmm2 1260; SSE2-NEXT: pxor %xmm3, %xmm4 1261; SSE2-NEXT: movdqa %xmm4, %xmm5 1262; SSE2-NEXT: pcmpgtd %xmm2, %xmm5 1263; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] 1264; SSE2-NEXT: pcmpeqd %xmm2, %xmm4 1265; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1266; SSE2-NEXT: pand %xmm6, %xmm2 1267; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] 1268; SSE2-NEXT: por %xmm2, %xmm4 1269; SSE2-NEXT: pand %xmm4, %xmm1 1270; SSE2-NEXT: pandn %xmm3, %xmm4 1271; SSE2-NEXT: por %xmm4, %xmm1 1272; SSE2-NEXT: retq 1273; 1274; SSE41-LABEL: min_le_v4i64: 1275; SSE41: # %bb.0: 1276; SSE41-NEXT: movdqa %xmm0, %xmm4 1277; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648] 1278; SSE41-NEXT: pxor %xmm5, %xmm0 1279; SSE41-NEXT: movdqa %xmm2, %xmm6 1280; SSE41-NEXT: pxor %xmm5, %xmm6 1281; SSE41-NEXT: movdqa %xmm6, %xmm7 1282; SSE41-NEXT: pcmpeqd %xmm0, %xmm7 1283; SSE41-NEXT: pcmpgtd %xmm0, %xmm6 1284; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,0,2,2] 1285; SSE41-NEXT: pand %xmm7, %xmm0 1286; SSE41-NEXT: por %xmm6, %xmm0 1287; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1288; SSE41-NEXT: movdqa %xmm1, %xmm0 1289; SSE41-NEXT: pxor %xmm5, %xmm0 1290; SSE41-NEXT: pxor %xmm3, %xmm5 1291; SSE41-NEXT: movdqa %xmm5, %xmm4 1292; SSE41-NEXT: pcmpeqd %xmm0, %xmm4 1293; SSE41-NEXT: pcmpgtd %xmm0, %xmm5 1294; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm5[0,0,2,2] 1295; SSE41-NEXT: pand %xmm4, %xmm0 1296; SSE41-NEXT: por %xmm5, %xmm0 1297; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1298; SSE41-NEXT: movapd %xmm2, %xmm0 1299; SSE41-NEXT: movapd %xmm3, %xmm1 1300; SSE41-NEXT: retq 1301; 1302; SSE42-LABEL: min_le_v4i64: 1303; SSE42: # %bb.0: 1304; SSE42-NEXT: movdqa %xmm0, %xmm4 1305; SSE42-NEXT: movdqa %xmm2, %xmm0 1306; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 1307; SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2 1308; SSE42-NEXT: movdqa %xmm3, %xmm0 1309; SSE42-NEXT: pcmpgtq %xmm1, %xmm0 1310; SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3 1311; SSE42-NEXT: movapd %xmm2, %xmm0 1312; SSE42-NEXT: movapd %xmm3, %xmm1 1313; SSE42-NEXT: retq 1314; 1315; AVX1-LABEL: min_le_v4i64: 1316; AVX1: # %bb.0: 1317; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 1318; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 1319; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 1320; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3 1321; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1322; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1323; AVX1-NEXT: retq 1324; 1325; AVX2-LABEL: min_le_v4i64: 1326; AVX2: # %bb.0: 1327; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2 1328; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 1329; AVX2-NEXT: retq 1330; 1331; AVX512-LABEL: min_le_v4i64: 1332; AVX512: # %bb.0: 1333; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1334; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1335; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 1336; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1337; AVX512-NEXT: retq 1338 %1 = icmp sle <4 x i64> %a, %b 1339 %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b 1340 ret <4 x i64> %2 1341} 1342 1343define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) { 1344; SSE2-LABEL: min_le_v4i32: 1345; SSE2: # %bb.0: 1346; SSE2-NEXT: movdqa %xmm1, %xmm2 1347; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1348; SSE2-NEXT: pand %xmm2, %xmm0 1349; SSE2-NEXT: pandn %xmm1, %xmm2 1350; SSE2-NEXT: por %xmm2, %xmm0 1351; SSE2-NEXT: retq 1352; 1353; SSE41-LABEL: min_le_v4i32: 1354; SSE41: # %bb.0: 1355; SSE41-NEXT: pminsd %xmm1, %xmm0 1356; SSE41-NEXT: retq 1357; 1358; SSE42-LABEL: min_le_v4i32: 1359; SSE42: # %bb.0: 1360; SSE42-NEXT: pminsd %xmm1, %xmm0 1361; SSE42-NEXT: retq 1362; 1363; AVX-LABEL: min_le_v4i32: 1364; AVX: # %bb.0: 1365; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1366; AVX-NEXT: retq 1367 %1 = icmp sle <4 x i32> %a, %b 1368 %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b 1369 ret <4 x i32> %2 1370} 1371 1372define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) { 1373; SSE2-LABEL: min_le_v8i32: 1374; SSE2: # %bb.0: 1375; SSE2-NEXT: movdqa %xmm2, %xmm4 1376; SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1377; SSE2-NEXT: pand %xmm4, %xmm0 1378; SSE2-NEXT: pandn %xmm2, %xmm4 1379; SSE2-NEXT: por %xmm4, %xmm0 1380; SSE2-NEXT: movdqa %xmm3, %xmm2 1381; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1382; SSE2-NEXT: pand %xmm2, %xmm1 1383; SSE2-NEXT: pandn %xmm3, %xmm2 1384; SSE2-NEXT: por %xmm2, %xmm1 1385; SSE2-NEXT: retq 1386; 1387; SSE41-LABEL: min_le_v8i32: 1388; SSE41: # %bb.0: 1389; SSE41-NEXT: pminsd %xmm2, %xmm0 1390; SSE41-NEXT: pminsd %xmm3, %xmm1 1391; SSE41-NEXT: retq 1392; 1393; SSE42-LABEL: min_le_v8i32: 1394; SSE42: # %bb.0: 1395; SSE42-NEXT: pminsd %xmm2, %xmm0 1396; SSE42-NEXT: pminsd %xmm3, %xmm1 1397; SSE42-NEXT: retq 1398; 1399; AVX1-LABEL: min_le_v8i32: 1400; AVX1: # %bb.0: 1401; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1402; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1403; AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2 1404; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 1405; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1406; AVX1-NEXT: retq 1407; 1408; AVX2-LABEL: min_le_v8i32: 1409; AVX2: # %bb.0: 1410; AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1411; AVX2-NEXT: retq 1412; 1413; AVX512-LABEL: min_le_v8i32: 1414; AVX512: # %bb.0: 1415; AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0 1416; AVX512-NEXT: retq 1417 %1 = icmp sle <8 x i32> %a, %b 1418 %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b 1419 ret <8 x i32> %2 1420} 1421 1422define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) { 1423; SSE-LABEL: min_le_v8i16: 1424; SSE: # %bb.0: 1425; SSE-NEXT: pminsw %xmm1, %xmm0 1426; SSE-NEXT: retq 1427; 1428; AVX-LABEL: min_le_v8i16: 1429; AVX: # %bb.0: 1430; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1431; AVX-NEXT: retq 1432 %1 = icmp sle <8 x i16> %a, %b 1433 %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b 1434 ret <8 x i16> %2 1435} 1436 1437define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) { 1438; SSE-LABEL: min_le_v16i16: 1439; SSE: # %bb.0: 1440; SSE-NEXT: pminsw %xmm2, %xmm0 1441; SSE-NEXT: pminsw %xmm3, %xmm1 1442; SSE-NEXT: retq 1443; 1444; AVX1-LABEL: min_le_v16i16: 1445; AVX1: # %bb.0: 1446; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1447; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1448; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 1449; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 1450; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1451; AVX1-NEXT: retq 1452; 1453; AVX2-LABEL: min_le_v16i16: 1454; AVX2: # %bb.0: 1455; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1456; AVX2-NEXT: retq 1457; 1458; AVX512-LABEL: min_le_v16i16: 1459; AVX512: # %bb.0: 1460; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 1461; AVX512-NEXT: retq 1462 %1 = icmp sle <16 x i16> %a, %b 1463 %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b 1464 ret <16 x i16> %2 1465} 1466 1467define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) { 1468; SSE2-LABEL: min_le_v16i8: 1469; SSE2: # %bb.0: 1470; SSE2-NEXT: movdqa %xmm1, %xmm2 1471; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 1472; SSE2-NEXT: pand %xmm2, %xmm0 1473; SSE2-NEXT: pandn %xmm1, %xmm2 1474; SSE2-NEXT: por %xmm2, %xmm0 1475; SSE2-NEXT: retq 1476; 1477; SSE41-LABEL: min_le_v16i8: 1478; SSE41: # %bb.0: 1479; SSE41-NEXT: pminsb %xmm1, %xmm0 1480; SSE41-NEXT: retq 1481; 1482; SSE42-LABEL: min_le_v16i8: 1483; SSE42: # %bb.0: 1484; SSE42-NEXT: pminsb %xmm1, %xmm0 1485; SSE42-NEXT: retq 1486; 1487; AVX-LABEL: min_le_v16i8: 1488; AVX: # %bb.0: 1489; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1490; AVX-NEXT: retq 1491 %1 = icmp sle <16 x i8> %a, %b 1492 %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b 1493 ret <16 x i8> %2 1494} 1495 1496define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) { 1497; SSE2-LABEL: min_le_v32i8: 1498; SSE2: # %bb.0: 1499; SSE2-NEXT: movdqa %xmm2, %xmm4 1500; SSE2-NEXT: pcmpgtb %xmm0, %xmm4 1501; SSE2-NEXT: pand %xmm4, %xmm0 1502; SSE2-NEXT: pandn %xmm2, %xmm4 1503; SSE2-NEXT: por %xmm4, %xmm0 1504; SSE2-NEXT: movdqa %xmm3, %xmm2 1505; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 1506; SSE2-NEXT: pand %xmm2, %xmm1 1507; SSE2-NEXT: pandn %xmm3, %xmm2 1508; SSE2-NEXT: por %xmm2, %xmm1 1509; SSE2-NEXT: retq 1510; 1511; SSE41-LABEL: min_le_v32i8: 1512; SSE41: # %bb.0: 1513; SSE41-NEXT: pminsb %xmm2, %xmm0 1514; SSE41-NEXT: pminsb %xmm3, %xmm1 1515; SSE41-NEXT: retq 1516; 1517; SSE42-LABEL: min_le_v32i8: 1518; SSE42: # %bb.0: 1519; SSE42-NEXT: pminsb %xmm2, %xmm0 1520; SSE42-NEXT: pminsb %xmm3, %xmm1 1521; SSE42-NEXT: retq 1522; 1523; AVX1-LABEL: min_le_v32i8: 1524; AVX1: # %bb.0: 1525; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1526; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1527; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 1528; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 1529; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1530; AVX1-NEXT: retq 1531; 1532; AVX2-LABEL: min_le_v32i8: 1533; AVX2: # %bb.0: 1534; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1535; AVX2-NEXT: retq 1536; 1537; AVX512-LABEL: min_le_v32i8: 1538; AVX512: # %bb.0: 1539; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 1540; AVX512-NEXT: retq 1541 %1 = icmp sle <32 x i8> %a, %b 1542 %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b 1543 ret <32 x i8> %2 1544} 1545 1546; 1547; Constant Folding 1548; 1549 1550define <2 x i64> @max_gt_v2i64c() { 1551; SSE-LABEL: max_gt_v2i64c: 1552; SSE: # %bb.0: 1553; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7] 1554; SSE-NEXT: retq 1555; 1556; AVX-LABEL: max_gt_v2i64c: 1557; AVX: # %bb.0: 1558; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7] 1559; AVX-NEXT: retq 1560 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 1561 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 1562 %3 = icmp sgt <2 x i64> %1, %2 1563 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 1564 ret <2 x i64> %4 1565} 1566 1567define <4 x i64> @max_gt_v4i64c() { 1568; SSE-LABEL: max_gt_v4i64c: 1569; SSE: # %bb.0: 1570; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7] 1571; SSE-NEXT: pcmpeqd %xmm0, %xmm0 1572; SSE-NEXT: retq 1573; 1574; AVX-LABEL: max_gt_v4i64c: 1575; AVX: # %bb.0: 1576; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7] 1577; AVX-NEXT: retq 1578 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 1579 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 1580 %3 = icmp sgt <4 x i64> %1, %2 1581 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 1582 ret <4 x i64> %4 1583} 1584 1585define <4 x i32> @max_gt_v4i32c() { 1586; SSE-LABEL: max_gt_v4i32c: 1587; SSE: # %bb.0: 1588; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] 1589; SSE-NEXT: retq 1590; 1591; AVX-LABEL: max_gt_v4i32c: 1592; AVX: # %bb.0: 1593; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] 1594; AVX-NEXT: retq 1595 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 1596 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 1597 %3 = icmp sgt <4 x i32> %1, %2 1598 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 1599 ret <4 x i32> %4 1600} 1601 1602define <8 x i32> @max_gt_v8i32c() { 1603; SSE-LABEL: max_gt_v8i32c: 1604; SSE: # %bb.0: 1605; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] 1606; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] 1607; SSE-NEXT: retq 1608; 1609; AVX-LABEL: max_gt_v8i32c: 1610; AVX: # %bb.0: 1611; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] 1612; AVX-NEXT: retq 1613 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 1614 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 1615 %3 = icmp sgt <8 x i32> %1, %2 1616 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 1617 ret <8 x i32> %4 1618} 1619 1620define <8 x i16> @max_gt_v8i16c() { 1621; SSE-LABEL: max_gt_v8i16c: 1622; SSE: # %bb.0: 1623; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] 1624; SSE-NEXT: retq 1625; 1626; AVX-LABEL: max_gt_v8i16c: 1627; AVX: # %bb.0: 1628; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] 1629; AVX-NEXT: retq 1630 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 1631 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 1632 %3 = icmp sgt <8 x i16> %1, %2 1633 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 1634 ret <8 x i16> %4 1635} 1636 1637define <16 x i16> @max_gt_v16i16c() { 1638; SSE-LABEL: max_gt_v16i16c: 1639; SSE: # %bb.0: 1640; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] 1641; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] 1642; SSE-NEXT: retq 1643; 1644; AVX-LABEL: max_gt_v16i16c: 1645; AVX: # %bb.0: 1646; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] 1647; AVX-NEXT: retq 1648 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 1649 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 1650 %3 = icmp sgt <16 x i16> %1, %2 1651 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 1652 ret <16 x i16> %4 1653} 1654 1655define <16 x i8> @max_gt_v16i8c() { 1656; SSE-LABEL: max_gt_v16i8c: 1657; SSE: # %bb.0: 1658; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] 1659; SSE-NEXT: retq 1660; 1661; AVX-LABEL: max_gt_v16i8c: 1662; AVX: # %bb.0: 1663; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] 1664; AVX-NEXT: retq 1665 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 1666 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 1667 %3 = icmp sgt <16 x i8> %1, %2 1668 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 1669 ret <16 x i8> %4 1670} 1671 1672define <2 x i64> @max_ge_v2i64c() { 1673; SSE-LABEL: max_ge_v2i64c: 1674; SSE: # %bb.0: 1675; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551615,7] 1676; SSE-NEXT: retq 1677; 1678; AVX-LABEL: max_ge_v2i64c: 1679; AVX: # %bb.0: 1680; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551615,7] 1681; AVX-NEXT: retq 1682 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 1683 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 1684 %3 = icmp sge <2 x i64> %1, %2 1685 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 1686 ret <2 x i64> %4 1687} 1688 1689define <4 x i64> @max_ge_v4i64c() { 1690; SSE-LABEL: max_ge_v4i64c: 1691; SSE: # %bb.0: 1692; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,7] 1693; SSE-NEXT: pcmpeqd %xmm0, %xmm0 1694; SSE-NEXT: retq 1695; 1696; AVX-LABEL: max_ge_v4i64c: 1697; AVX: # %bb.0: 1698; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7] 1699; AVX-NEXT: retq 1700 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 1701 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 1702 %3 = icmp sge <4 x i64> %1, %2 1703 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 1704 ret <4 x i64> %4 1705} 1706 1707define <4 x i32> @max_ge_v4i32c() { 1708; SSE-LABEL: max_ge_v4i32c: 1709; SSE: # %bb.0: 1710; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] 1711; SSE-NEXT: retq 1712; 1713; AVX-LABEL: max_ge_v4i32c: 1714; AVX: # %bb.0: 1715; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7] 1716; AVX-NEXT: retq 1717 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 1718 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 1719 %3 = icmp sge <4 x i32> %1, %2 1720 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 1721 ret <4 x i32> %4 1722} 1723 1724define <8 x i32> @max_ge_v8i32c() { 1725; SSE-LABEL: max_ge_v8i32c: 1726; SSE: # %bb.0: 1727; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295] 1728; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,5,5,7] 1729; SSE-NEXT: retq 1730; 1731; AVX-LABEL: max_ge_v8i32c: 1732; AVX: # %bb.0: 1733; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7] 1734; AVX-NEXT: retq 1735 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 1736 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 1737 %3 = icmp sge <8 x i32> %1, %2 1738 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 1739 ret <8 x i32> %4 1740} 1741 1742define <8 x i16> @max_ge_v8i16c() { 1743; SSE-LABEL: max_ge_v8i16c: 1744; SSE: # %bb.0: 1745; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] 1746; SSE-NEXT: retq 1747; 1748; AVX-LABEL: max_ge_v8i16c: 1749; AVX: # %bb.0: 1750; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7] 1751; AVX-NEXT: retq 1752 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 1753 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 1754 %3 = icmp sge <8 x i16> %1, %2 1755 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 1756 ret <8 x i16> %4 1757} 1758 1759define <16 x i16> @max_ge_v16i16c() { 1760; SSE-LABEL: max_ge_v16i16c: 1761; SSE: # %bb.0: 1762; SSE-NEXT: movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0] 1763; SSE-NEXT: movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8] 1764; SSE-NEXT: retq 1765; 1766; AVX-LABEL: max_ge_v16i16c: 1767; AVX: # %bb.0: 1768; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8] 1769; AVX-NEXT: retq 1770 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 1771 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 1772 %3 = icmp sge <16 x i16> %1, %2 1773 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 1774 ret <16 x i16> %4 1775} 1776 1777define <16 x i8> @max_ge_v16i8c() { 1778; SSE-LABEL: max_ge_v16i8c: 1779; SSE: # %bb.0: 1780; SSE-NEXT: movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] 1781; SSE-NEXT: retq 1782; 1783; AVX-LABEL: max_ge_v16i8c: 1784; AVX: # %bb.0: 1785; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8] 1786; AVX-NEXT: retq 1787 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 1788 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 1789 %3 = icmp sge <16 x i8> %1, %2 1790 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 1791 ret <16 x i8> %4 1792} 1793 1794define <2 x i64> @min_lt_v2i64c() { 1795; SSE-LABEL: min_lt_v2i64c: 1796; SSE: # %bb.0: 1797; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1] 1798; SSE-NEXT: retq 1799; 1800; AVX-LABEL: min_lt_v2i64c: 1801; AVX: # %bb.0: 1802; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1] 1803; AVX-NEXT: retq 1804 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 1805 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 1806 %3 = icmp slt <2 x i64> %1, %2 1807 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 1808 ret <2 x i64> %4 1809} 1810 1811define <4 x i64> @min_lt_v4i64c() { 1812; SSE-LABEL: min_lt_v4i64c: 1813; SSE: # %bb.0: 1814; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609] 1815; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1] 1816; SSE-NEXT: retq 1817; 1818; AVX-LABEL: min_lt_v4i64c: 1819; AVX: # %bb.0: 1820; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1] 1821; AVX-NEXT: retq 1822 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 1823 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 1824 %3 = icmp slt <4 x i64> %1, %2 1825 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 1826 ret <4 x i64> %4 1827} 1828 1829define <4 x i32> @min_lt_v4i32c() { 1830; SSE-LABEL: min_lt_v4i32c: 1831; SSE: # %bb.0: 1832; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] 1833; SSE-NEXT: retq 1834; 1835; AVX-LABEL: min_lt_v4i32c: 1836; AVX: # %bb.0: 1837; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] 1838; AVX-NEXT: retq 1839 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 1840 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 1841 %3 = icmp slt <4 x i32> %1, %2 1842 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 1843 ret <4 x i32> %4 1844} 1845 1846define <8 x i32> @min_lt_v8i32c() { 1847; SSE-LABEL: min_lt_v8i32c: 1848; SSE: # %bb.0: 1849; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] 1850; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] 1851; SSE-NEXT: retq 1852; 1853; AVX-LABEL: min_lt_v8i32c: 1854; AVX: # %bb.0: 1855; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] 1856; AVX-NEXT: retq 1857 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 1858 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 1859 %3 = icmp slt <8 x i32> %1, %2 1860 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 1861 ret <8 x i32> %4 1862} 1863 1864define <8 x i16> @min_lt_v8i16c() { 1865; SSE-LABEL: min_lt_v8i16c: 1866; SSE: # %bb.0: 1867; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] 1868; SSE-NEXT: retq 1869; 1870; AVX-LABEL: min_lt_v8i16c: 1871; AVX: # %bb.0: 1872; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] 1873; AVX-NEXT: retq 1874 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 1875 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 1876 %3 = icmp slt <8 x i16> %1, %2 1877 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 1878 ret <8 x i16> %4 1879} 1880 1881define <16 x i16> @min_lt_v16i16c() { 1882; SSE-LABEL: min_lt_v16i16c: 1883; SSE: # %bb.0: 1884; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0] 1885; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] 1886; SSE-NEXT: retq 1887; 1888; AVX-LABEL: min_lt_v16i16c: 1889; AVX: # %bb.0: 1890; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] 1891; AVX-NEXT: retq 1892 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 1893 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 1894 %3 = icmp slt <16 x i16> %1, %2 1895 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 1896 ret <16 x i16> %4 1897} 1898 1899define <16 x i8> @min_lt_v16i8c() { 1900; SSE-LABEL: min_lt_v16i8c: 1901; SSE: # %bb.0: 1902; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] 1903; SSE-NEXT: retq 1904; 1905; AVX-LABEL: min_lt_v16i8c: 1906; AVX: # %bb.0: 1907; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] 1908; AVX-NEXT: retq 1909 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 1910 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 1911 %3 = icmp slt <16 x i8> %1, %2 1912 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 1913 ret <16 x i8> %4 1914} 1915 1916define <2 x i64> @min_le_v2i64c() { 1917; SSE-LABEL: min_le_v2i64c: 1918; SSE: # %bb.0: 1919; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,1] 1920; SSE-NEXT: retq 1921; 1922; AVX-LABEL: min_le_v2i64c: 1923; AVX: # %bb.0: 1924; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [18446744073709551609,1] 1925; AVX-NEXT: retq 1926 %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0 1927 %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0 1928 %3 = icmp sle <2 x i64> %1, %2 1929 %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 1930 ret <2 x i64> %4 1931} 1932 1933define <4 x i64> @min_le_v4i64c() { 1934; SSE-LABEL: min_le_v4i64c: 1935; SSE: # %bb.0: 1936; SSE-NEXT: movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609] 1937; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,1] 1938; SSE-NEXT: retq 1939; 1940; AVX-LABEL: min_le_v4i64c: 1941; AVX: # %bb.0: 1942; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1] 1943; AVX-NEXT: retq 1944 %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0 1945 %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0 1946 %3 = icmp sle <4 x i64> %1, %2 1947 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 1948 ret <4 x i64> %4 1949} 1950 1951define <4 x i32> @min_le_v4i32c() { 1952; SSE-LABEL: min_le_v4i32c: 1953; SSE: # %bb.0: 1954; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] 1955; SSE-NEXT: retq 1956; 1957; AVX-LABEL: min_le_v4i32c: 1958; AVX: # %bb.0: 1959; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1] 1960; AVX-NEXT: retq 1961 %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0 1962 %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0 1963 %3 = icmp sle <4 x i32> %1, %2 1964 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 1965 ret <4 x i32> %4 1966} 1967 1968define <8 x i32> @min_le_v8i32c() { 1969; SSE-LABEL: min_le_v8i32c: 1970; SSE: # %bb.0: 1971; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289] 1972; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,3,3,1] 1973; SSE-NEXT: retq 1974; 1975; AVX-LABEL: min_le_v8i32c: 1976; AVX: # %bb.0: 1977; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1] 1978; AVX-NEXT: retq 1979 %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0 1980 %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0 1981 %3 = icmp sle <8 x i32> %1, %2 1982 %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2 1983 ret <8 x i32> %4 1984} 1985 1986define <8 x i16> @min_le_v8i16c() { 1987; SSE-LABEL: min_le_v8i16c: 1988; SSE: # %bb.0: 1989; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] 1990; SSE-NEXT: retq 1991; 1992; AVX-LABEL: min_le_v8i16c: 1993; AVX: # %bb.0: 1994; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1] 1995; AVX-NEXT: retq 1996 %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0 1997 %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0 1998 %3 = icmp sle <8 x i16> %1, %2 1999 %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 2000 ret <8 x i16> %4 2001} 2002 2003define <16 x i16> @min_le_v16i16c() { 2004; SSE-LABEL: min_le_v16i16c: 2005; SSE: # %bb.0: 2006; SSE-NEXT: movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0] 2007; SSE-NEXT: movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0] 2008; SSE-NEXT: retq 2009; 2010; AVX-LABEL: min_le_v16i16c: 2011; AVX: # %bb.0: 2012; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0] 2013; AVX-NEXT: retq 2014 %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0 2015 %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0 2016 %3 = icmp sle <16 x i16> %1, %2 2017 %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2 2018 ret <16 x i16> %4 2019} 2020 2021define <16 x i8> @min_le_v16i8c() { 2022; SSE-LABEL: min_le_v16i8c: 2023; SSE: # %bb.0: 2024; SSE-NEXT: movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] 2025; SSE-NEXT: retq 2026; 2027; AVX-LABEL: min_le_v16i8c: 2028; AVX: # %bb.0: 2029; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0] 2030; AVX-NEXT: retq 2031 %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0 2032 %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0 2033 %3 = icmp sle <16 x i8> %1, %2 2034 %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 2035 ret <16 x i8> %4 2036} 2037