1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefix=AVX512BW 8 9define i8 @v8i16(<8 x i16> %a, <8 x i16> %b) { 10; SSE2-SSSE3-LABEL: v8i16: 11; SSE2-SSSE3: # %bb.0: 12; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 13; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 14; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 15; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 16; SSE2-SSSE3-NEXT: retq 17; 18; AVX12-LABEL: v8i16: 19; AVX12: # %bb.0: 20; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 21; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 22; AVX12-NEXT: vpmovmskb %xmm0, %eax 23; AVX12-NEXT: # kill: def $al killed $al killed $eax 24; AVX12-NEXT: retq 25; 26; AVX512F-LABEL: v8i16: 27; AVX512F: # %bb.0: 28; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 29; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0 30; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 31; AVX512F-NEXT: kmovw %k0, %eax 32; AVX512F-NEXT: # kill: def $al killed $al killed $eax 33; AVX512F-NEXT: vzeroupper 34; AVX512F-NEXT: retq 35; 36; AVX512BW-LABEL: v8i16: 37; AVX512BW: # %bb.0: 38; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 39; AVX512BW-NEXT: kmovd %k0, %eax 40; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 41; AVX512BW-NEXT: retq 42 %x = icmp sgt <8 x i16> %a, %b 43 %res = bitcast <8 x i1> %x to i8 44 ret i8 %res 45} 46 47define i4 @v4i32(<4 x i32> %a, <4 x i32> %b) { 48; SSE2-SSSE3-LABEL: v4i32: 49; SSE2-SSSE3: # %bb.0: 50; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 51; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 52; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 53; SSE2-SSSE3-NEXT: retq 54; 55; AVX12-LABEL: v4i32: 56; AVX12: # %bb.0: 57; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 58; AVX12-NEXT: vmovmskps %xmm0, %eax 59; AVX12-NEXT: # kill: def $al killed $al killed $eax 60; AVX12-NEXT: retq 61; 62; AVX512F-LABEL: v4i32: 63; AVX512F: # %bb.0: 64; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 65; AVX512F-NEXT: kmovw %k0, %eax 66; AVX512F-NEXT: # kill: def $al killed $al killed $eax 67; AVX512F-NEXT: retq 68; 69; AVX512BW-LABEL: v4i32: 70; AVX512BW: # %bb.0: 71; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 72; AVX512BW-NEXT: kmovd %k0, %eax 73; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 74; AVX512BW-NEXT: retq 75 %x = icmp sgt <4 x i32> %a, %b 76 %res = bitcast <4 x i1> %x to i4 77 ret i4 %res 78} 79 80define i4 @v4f32(<4 x float> %a, <4 x float> %b) { 81; SSE2-SSSE3-LABEL: v4f32: 82; SSE2-SSSE3: # %bb.0: 83; SSE2-SSSE3-NEXT: cmpltps %xmm0, %xmm1 84; SSE2-SSSE3-NEXT: movmskps %xmm1, %eax 85; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 86; SSE2-SSSE3-NEXT: retq 87; 88; AVX12-LABEL: v4f32: 89; AVX12: # %bb.0: 90; AVX12-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 91; AVX12-NEXT: vmovmskps %xmm0, %eax 92; AVX12-NEXT: # kill: def $al killed $al killed $eax 93; AVX12-NEXT: retq 94; 95; AVX512F-LABEL: v4f32: 96; AVX512F: # %bb.0: 97; AVX512F-NEXT: vcmpltps %xmm0, %xmm1, %k0 98; AVX512F-NEXT: kmovw %k0, %eax 99; AVX512F-NEXT: # kill: def $al killed $al killed $eax 100; AVX512F-NEXT: retq 101; 102; AVX512BW-LABEL: v4f32: 103; AVX512BW: # %bb.0: 104; AVX512BW-NEXT: vcmpltps %xmm0, %xmm1, %k0 105; AVX512BW-NEXT: kmovd %k0, %eax 106; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 107; AVX512BW-NEXT: retq 108 %x = fcmp ogt <4 x float> %a, %b 109 %res = bitcast <4 x i1> %x to i4 110 ret i4 %res 111} 112 113define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) { 114; SSE2-SSSE3-LABEL: v16i8: 115; SSE2-SSSE3: # %bb.0: 116; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 117; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 118; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax 119; SSE2-SSSE3-NEXT: retq 120; 121; AVX12-LABEL: v16i8: 122; AVX12: # %bb.0: 123; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 124; AVX12-NEXT: vpmovmskb %xmm0, %eax 125; AVX12-NEXT: # kill: def $ax killed $ax killed $eax 126; AVX12-NEXT: retq 127; 128; AVX512F-LABEL: v16i8: 129; AVX512F: # %bb.0: 130; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 131; AVX512F-NEXT: vpmovmskb %xmm0, %eax 132; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax 133; AVX512F-NEXT: retq 134; 135; AVX512BW-LABEL: v16i8: 136; AVX512BW: # %bb.0: 137; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 138; AVX512BW-NEXT: kmovd %k0, %eax 139; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax 140; AVX512BW-NEXT: retq 141 %x = icmp sgt <16 x i8> %a, %b 142 %res = bitcast <16 x i1> %x to i16 143 ret i16 %res 144} 145 146define i2 @v2i8(<2 x i8> %a, <2 x i8> %b) { 147; SSE2-LABEL: v2i8: 148; SSE2: # %bb.0: 149; SSE2-NEXT: pcmpgtb %xmm1, %xmm0 150; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 151; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 152; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 153; SSE2-NEXT: movmskpd %xmm0, %eax 154; SSE2-NEXT: # kill: def $al killed $al killed $eax 155; SSE2-NEXT: retq 156; 157; SSSE3-LABEL: v2i8: 158; SSSE3: # %bb.0: 159; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 160; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,0,u,u,u,u,u,u,u,1] 161; SSSE3-NEXT: movmskpd %xmm0, %eax 162; SSSE3-NEXT: # kill: def $al killed $al killed $eax 163; SSSE3-NEXT: retq 164; 165; AVX12-LABEL: v2i8: 166; AVX12: # %bb.0: 167; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 168; AVX12-NEXT: vpmovsxbq %xmm0, %xmm0 169; AVX12-NEXT: vmovmskpd %xmm0, %eax 170; AVX12-NEXT: # kill: def $al killed $al killed $eax 171; AVX12-NEXT: retq 172; 173; AVX512F-LABEL: v2i8: 174; AVX512F: # %bb.0: 175; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 176; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 177; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 178; AVX512F-NEXT: kmovw %k0, %eax 179; AVX512F-NEXT: # kill: def $al killed $al killed $eax 180; AVX512F-NEXT: vzeroupper 181; AVX512F-NEXT: retq 182; 183; AVX512BW-LABEL: v2i8: 184; AVX512BW: # %bb.0: 185; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 186; AVX512BW-NEXT: kmovd %k0, %eax 187; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 188; AVX512BW-NEXT: retq 189 %x = icmp sgt <2 x i8> %a, %b 190 %res = bitcast <2 x i1> %x to i2 191 ret i2 %res 192} 193 194define i2 @v2i16(<2 x i16> %a, <2 x i16> %b) { 195; SSE2-SSSE3-LABEL: v2i16: 196; SSE2-SSSE3: # %bb.0: 197; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 198; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 199; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 200; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax 201; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 202; SSE2-SSSE3-NEXT: retq 203; 204; AVX12-LABEL: v2i16: 205; AVX12: # %bb.0: 206; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 207; AVX12-NEXT: vpmovsxwq %xmm0, %xmm0 208; AVX12-NEXT: vmovmskpd %xmm0, %eax 209; AVX12-NEXT: # kill: def $al killed $al killed $eax 210; AVX12-NEXT: retq 211; 212; AVX512F-LABEL: v2i16: 213; AVX512F: # %bb.0: 214; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 215; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0 216; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 217; AVX512F-NEXT: kmovw %k0, %eax 218; AVX512F-NEXT: # kill: def $al killed $al killed $eax 219; AVX512F-NEXT: vzeroupper 220; AVX512F-NEXT: retq 221; 222; AVX512BW-LABEL: v2i16: 223; AVX512BW: # %bb.0: 224; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 225; AVX512BW-NEXT: kmovd %k0, %eax 226; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 227; AVX512BW-NEXT: retq 228 %x = icmp sgt <2 x i16> %a, %b 229 %res = bitcast <2 x i1> %x to i2 230 ret i2 %res 231} 232 233define i2 @v2i32(<2 x i32> %a, <2 x i32> %b) { 234; SSE2-SSSE3-LABEL: v2i32: 235; SSE2-SSSE3: # %bb.0: 236; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 237; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 238; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax 239; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 240; SSE2-SSSE3-NEXT: retq 241; 242; AVX12-LABEL: v2i32: 243; AVX12: # %bb.0: 244; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 245; AVX12-NEXT: vpmovsxdq %xmm0, %xmm0 246; AVX12-NEXT: vmovmskpd %xmm0, %eax 247; AVX12-NEXT: # kill: def $al killed $al killed $eax 248; AVX12-NEXT: retq 249; 250; AVX512F-LABEL: v2i32: 251; AVX512F: # %bb.0: 252; AVX512F-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 253; AVX512F-NEXT: kmovw %k0, %eax 254; AVX512F-NEXT: # kill: def $al killed $al killed $eax 255; AVX512F-NEXT: retq 256; 257; AVX512BW-LABEL: v2i32: 258; AVX512BW: # %bb.0: 259; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 260; AVX512BW-NEXT: kmovd %k0, %eax 261; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 262; AVX512BW-NEXT: retq 263 %x = icmp sgt <2 x i32> %a, %b 264 %res = bitcast <2 x i1> %x to i2 265 ret i2 %res 266} 267 268define i2 @v2i64(<2 x i64> %a, <2 x i64> %b) { 269; SSE2-SSSE3-LABEL: v2i64: 270; SSE2-SSSE3: # %bb.0: 271; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648] 272; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm1 273; SSE2-SSSE3-NEXT: pxor %xmm2, %xmm0 274; SSE2-SSSE3-NEXT: movdqa %xmm0, %xmm2 275; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2 276; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0 277; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2] 278; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 279; SSE2-SSSE3-NEXT: por %xmm0, %xmm1 280; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax 281; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 282; SSE2-SSSE3-NEXT: retq 283; 284; AVX12-LABEL: v2i64: 285; AVX12: # %bb.0: 286; AVX12-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 287; AVX12-NEXT: vmovmskpd %xmm0, %eax 288; AVX12-NEXT: # kill: def $al killed $al killed $eax 289; AVX12-NEXT: retq 290; 291; AVX512F-LABEL: v2i64: 292; AVX512F: # %bb.0: 293; AVX512F-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 294; AVX512F-NEXT: kmovw %k0, %eax 295; AVX512F-NEXT: # kill: def $al killed $al killed $eax 296; AVX512F-NEXT: retq 297; 298; AVX512BW-LABEL: v2i64: 299; AVX512BW: # %bb.0: 300; AVX512BW-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 301; AVX512BW-NEXT: kmovd %k0, %eax 302; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 303; AVX512BW-NEXT: retq 304 %x = icmp sgt <2 x i64> %a, %b 305 %res = bitcast <2 x i1> %x to i2 306 ret i2 %res 307} 308 309define i2 @v2f64(<2 x double> %a, <2 x double> %b) { 310; SSE2-SSSE3-LABEL: v2f64: 311; SSE2-SSSE3: # %bb.0: 312; SSE2-SSSE3-NEXT: cmpltpd %xmm0, %xmm1 313; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax 314; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 315; SSE2-SSSE3-NEXT: retq 316; 317; AVX12-LABEL: v2f64: 318; AVX12: # %bb.0: 319; AVX12-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 320; AVX12-NEXT: vmovmskpd %xmm0, %eax 321; AVX12-NEXT: # kill: def $al killed $al killed $eax 322; AVX12-NEXT: retq 323; 324; AVX512F-LABEL: v2f64: 325; AVX512F: # %bb.0: 326; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %k0 327; AVX512F-NEXT: kmovw %k0, %eax 328; AVX512F-NEXT: # kill: def $al killed $al killed $eax 329; AVX512F-NEXT: retq 330; 331; AVX512BW-LABEL: v2f64: 332; AVX512BW: # %bb.0: 333; AVX512BW-NEXT: vcmpltpd %xmm0, %xmm1, %k0 334; AVX512BW-NEXT: kmovd %k0, %eax 335; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 336; AVX512BW-NEXT: retq 337 %x = fcmp ogt <2 x double> %a, %b 338 %res = bitcast <2 x i1> %x to i2 339 ret i2 %res 340} 341 342define i4 @v4i8(<4 x i8> %a, <4 x i8> %b) { 343; SSE2-SSSE3-LABEL: v4i8: 344; SSE2-SSSE3: # %bb.0: 345; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 346; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 347; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 348; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 349; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 350; SSE2-SSSE3-NEXT: retq 351; 352; AVX12-LABEL: v4i8: 353; AVX12: # %bb.0: 354; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 355; AVX12-NEXT: vpmovsxbd %xmm0, %xmm0 356; AVX12-NEXT: vmovmskps %xmm0, %eax 357; AVX12-NEXT: # kill: def $al killed $al killed $eax 358; AVX12-NEXT: retq 359; 360; AVX512F-LABEL: v4i8: 361; AVX512F: # %bb.0: 362; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 363; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 364; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 365; AVX512F-NEXT: kmovw %k0, %eax 366; AVX512F-NEXT: # kill: def $al killed $al killed $eax 367; AVX512F-NEXT: vzeroupper 368; AVX512F-NEXT: retq 369; 370; AVX512BW-LABEL: v4i8: 371; AVX512BW: # %bb.0: 372; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 373; AVX512BW-NEXT: kmovd %k0, %eax 374; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 375; AVX512BW-NEXT: retq 376 %x = icmp sgt <4 x i8> %a, %b 377 %res = bitcast <4 x i1> %x to i4 378 ret i4 %res 379} 380 381define i4 @v4i16(<4 x i16> %a, <4 x i16> %b) { 382; SSE2-SSSE3-LABEL: v4i16: 383; SSE2-SSSE3: # %bb.0: 384; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 385; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 386; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 387; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 388; SSE2-SSSE3-NEXT: retq 389; 390; AVX12-LABEL: v4i16: 391; AVX12: # %bb.0: 392; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 393; AVX12-NEXT: vpmovsxwd %xmm0, %xmm0 394; AVX12-NEXT: vmovmskps %xmm0, %eax 395; AVX12-NEXT: # kill: def $al killed $al killed $eax 396; AVX12-NEXT: retq 397; 398; AVX512F-LABEL: v4i16: 399; AVX512F: # %bb.0: 400; AVX512F-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 401; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0 402; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 403; AVX512F-NEXT: kmovw %k0, %eax 404; AVX512F-NEXT: # kill: def $al killed $al killed $eax 405; AVX512F-NEXT: vzeroupper 406; AVX512F-NEXT: retq 407; 408; AVX512BW-LABEL: v4i16: 409; AVX512BW: # %bb.0: 410; AVX512BW-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 411; AVX512BW-NEXT: kmovd %k0, %eax 412; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 413; AVX512BW-NEXT: retq 414 %x = icmp sgt <4 x i16> %a, %b 415 %res = bitcast <4 x i1> %x to i4 416 ret i4 %res 417} 418 419define i8 @v8i8(<8 x i8> %a, <8 x i8> %b) { 420; SSE2-SSSE3-LABEL: v8i8: 421; SSE2-SSSE3: # %bb.0: 422; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0 423; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 424; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 425; SSE2-SSSE3-NEXT: retq 426; 427; AVX12-LABEL: v8i8: 428; AVX12: # %bb.0: 429; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 430; AVX12-NEXT: vpmovmskb %xmm0, %eax 431; AVX12-NEXT: # kill: def $al killed $al killed $eax 432; AVX12-NEXT: retq 433; 434; AVX512F-LABEL: v8i8: 435; AVX512F: # %bb.0: 436; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 437; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 438; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 439; AVX512F-NEXT: kmovw %k0, %eax 440; AVX512F-NEXT: # kill: def $al killed $al killed $eax 441; AVX512F-NEXT: vzeroupper 442; AVX512F-NEXT: retq 443; 444; AVX512BW-LABEL: v8i8: 445; AVX512BW: # %bb.0: 446; AVX512BW-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 447; AVX512BW-NEXT: kmovd %k0, %eax 448; AVX512BW-NEXT: # kill: def $al killed $al killed $eax 449; AVX512BW-NEXT: retq 450 %x = icmp sgt <8 x i8> %a, %b 451 %res = bitcast <8 x i1> %x to i8 452 ret i8 %res 453} 454 455define i64 @v16i8_widened_with_zeroes(<16 x i8> %a, <16 x i8> %b) { 456; SSE2-SSSE3-LABEL: v16i8_widened_with_zeroes: 457; SSE2-SSSE3: # %bb.0: # %entry 458; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 459; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 460; SSE2-SSSE3-NEXT: retq 461; 462; AVX1-LABEL: v16i8_widened_with_zeroes: 463; AVX1: # %bb.0: # %entry 464; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 465; AVX1-NEXT: vpmovmskb %xmm0, %eax 466; AVX1-NEXT: retq 467; 468; AVX2-LABEL: v16i8_widened_with_zeroes: 469; AVX2: # %bb.0: # %entry 470; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 471; AVX2-NEXT: vpmovmskb %ymm0, %eax 472; AVX2-NEXT: vzeroupper 473; AVX2-NEXT: retq 474; 475; AVX512F-LABEL: v16i8_widened_with_zeroes: 476; AVX512F: # %bb.0: # %entry 477; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 478; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 479; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 480; AVX512F-NEXT: kmovw %k0, %eax 481; AVX512F-NEXT: vzeroupper 482; AVX512F-NEXT: retq 483; 484; AVX512BW-LABEL: v16i8_widened_with_zeroes: 485; AVX512BW: # %bb.0: # %entry 486; AVX512BW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 487; AVX512BW-NEXT: kmovq %k0, %rax 488; AVX512BW-NEXT: retq 489entry: 490 %c = icmp eq <16 x i8> %a, %b 491 %d = shufflevector <16 x i1> %c, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 492 %e = bitcast <64 x i1> %d to i64 493 ret i64 %e 494} 495 496define i64 @v16i8_widened_with_ones(<16 x i8> %a, <16 x i8> %b) { 497; SSE2-SSSE3-LABEL: v16i8_widened_with_ones: 498; SSE2-SSSE3: # %bb.0: # %entry 499; SSE2-SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 500; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx 501; SSE2-SSSE3-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000 502; SSE2-SSSE3-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 503; SSE2-SSSE3-NEXT: orq %rcx, %rax 504; SSE2-SSSE3-NEXT: retq 505; 506; AVX1-LABEL: v16i8_widened_with_ones: 507; AVX1: # %bb.0: # %entry 508; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 509; AVX1-NEXT: vpmovmskb %xmm0, %ecx 510; AVX1-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000 511; AVX1-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 512; AVX1-NEXT: orq %rcx, %rax 513; AVX1-NEXT: retq 514; 515; AVX2-LABEL: v16i8_widened_with_ones: 516; AVX2: # %bb.0: # %entry 517; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 518; AVX2-NEXT: vinserti128 $1, {{.*}}(%rip), %ymm0, %ymm0 519; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 520; AVX2-NEXT: vpmovmskb %ymm0, %ecx 521; AVX2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 522; AVX2-NEXT: orq %rcx, %rax 523; AVX2-NEXT: vzeroupper 524; AVX2-NEXT: retq 525; 526; AVX512F-LABEL: v16i8_widened_with_ones: 527; AVX512F: # %bb.0: # %entry 528; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 529; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 530; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 531; AVX512F-NEXT: kmovw %k0, %ecx 532; AVX512F-NEXT: orl $-65536, %ecx # imm = 0xFFFF0000 533; AVX512F-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 534; AVX512F-NEXT: orq %rcx, %rax 535; AVX512F-NEXT: vzeroupper 536; AVX512F-NEXT: retq 537; 538; AVX512BW-LABEL: v16i8_widened_with_ones: 539; AVX512BW: # %bb.0: # %entry 540; AVX512BW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 541; AVX512BW-NEXT: kxnorw %k0, %k0, %k1 542; AVX512BW-NEXT: kunpckwd %k0, %k1, %k0 543; AVX512BW-NEXT: kxnord %k0, %k0, %k1 544; AVX512BW-NEXT: kunpckdq %k0, %k1, %k0 545; AVX512BW-NEXT: kmovq %k0, %rax 546; AVX512BW-NEXT: retq 547entry: 548 %c = icmp eq <16 x i8> %a, %b 549 %d = shufflevector <16 x i1> %c, <16 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 550 %e = bitcast <64 x i1> %d to i64 551 ret i64 %e 552} 553 554define void @bitcast_16i8_store(i16* %p, <16 x i8> %a0) { 555; SSE2-SSSE3-LABEL: bitcast_16i8_store: 556; SSE2-SSSE3: # %bb.0: 557; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 558; SSE2-SSSE3-NEXT: movw %ax, (%rdi) 559; SSE2-SSSE3-NEXT: retq 560; 561; AVX12-LABEL: bitcast_16i8_store: 562; AVX12: # %bb.0: 563; AVX12-NEXT: vpmovmskb %xmm0, %eax 564; AVX12-NEXT: movw %ax, (%rdi) 565; AVX12-NEXT: retq 566; 567; AVX512F-LABEL: bitcast_16i8_store: 568; AVX512F: # %bb.0: 569; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 570; AVX512F-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 571; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 572; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 573; AVX512F-NEXT: kmovw %k0, (%rdi) 574; AVX512F-NEXT: vzeroupper 575; AVX512F-NEXT: retq 576; 577; AVX512BW-LABEL: bitcast_16i8_store: 578; AVX512BW: # %bb.0: 579; AVX512BW-NEXT: vpmovb2m %xmm0, %k0 580; AVX512BW-NEXT: kmovw %k0, (%rdi) 581; AVX512BW-NEXT: retq 582 %a1 = icmp slt <16 x i8> %a0, zeroinitializer 583 %a2 = bitcast <16 x i1> %a1 to i16 584 store i16 %a2, i16* %p 585 ret void 586} 587 588define void @bitcast_8i16_store(i8* %p, <8 x i16> %a0) { 589; SSE2-SSSE3-LABEL: bitcast_8i16_store: 590; SSE2-SSSE3: # %bb.0: 591; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 592; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 593; SSE2-SSSE3-NEXT: movb %al, (%rdi) 594; SSE2-SSSE3-NEXT: retq 595; 596; AVX12-LABEL: bitcast_8i16_store: 597; AVX12: # %bb.0: 598; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 599; AVX12-NEXT: vpmovmskb %xmm0, %eax 600; AVX12-NEXT: movb %al, (%rdi) 601; AVX12-NEXT: retq 602; 603; AVX512F-LABEL: bitcast_8i16_store: 604; AVX512F: # %bb.0: 605; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 606; AVX512F-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 607; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0 608; AVX512F-NEXT: vptestmd %ymm0, %ymm0, %k0 609; AVX512F-NEXT: kmovw %k0, %eax 610; AVX512F-NEXT: movb %al, (%rdi) 611; AVX512F-NEXT: vzeroupper 612; AVX512F-NEXT: retq 613; 614; AVX512BW-LABEL: bitcast_8i16_store: 615; AVX512BW: # %bb.0: 616; AVX512BW-NEXT: vpmovw2m %xmm0, %k0 617; AVX512BW-NEXT: kmovd %k0, %eax 618; AVX512BW-NEXT: movb %al, (%rdi) 619; AVX512BW-NEXT: retq 620 %a1 = icmp slt <8 x i16> %a0, zeroinitializer 621 %a2 = bitcast <8 x i1> %a1 to i8 622 store i8 %a2, i8* %p 623 ret void 624} 625 626define void @bitcast_4i32_store(i4* %p, <4 x i32> %a0) { 627; SSE2-SSSE3-LABEL: bitcast_4i32_store: 628; SSE2-SSSE3: # %bb.0: 629; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 630; SSE2-SSSE3-NEXT: movb %al, (%rdi) 631; SSE2-SSSE3-NEXT: retq 632; 633; AVX12-LABEL: bitcast_4i32_store: 634; AVX12: # %bb.0: 635; AVX12-NEXT: vmovmskps %xmm0, %eax 636; AVX12-NEXT: movb %al, (%rdi) 637; AVX12-NEXT: retq 638; 639; AVX512F-LABEL: bitcast_4i32_store: 640; AVX512F: # %bb.0: 641; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 642; AVX512F-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 643; AVX512F-NEXT: kmovw %k0, %eax 644; AVX512F-NEXT: movb %al, (%rdi) 645; AVX512F-NEXT: retq 646; 647; AVX512BW-LABEL: bitcast_4i32_store: 648; AVX512BW: # %bb.0: 649; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 650; AVX512BW-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 651; AVX512BW-NEXT: kmovd %k0, %eax 652; AVX512BW-NEXT: movb %al, (%rdi) 653; AVX512BW-NEXT: retq 654 %a1 = icmp slt <4 x i32> %a0, zeroinitializer 655 %a2 = bitcast <4 x i1> %a1 to i4 656 store i4 %a2, i4* %p 657 ret void 658} 659 660define void @bitcast_2i64_store(i2* %p, <2 x i64> %a0) { 661; SSE2-SSSE3-LABEL: bitcast_2i64_store: 662; SSE2-SSSE3: # %bb.0: 663; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax 664; SSE2-SSSE3-NEXT: movb %al, (%rdi) 665; SSE2-SSSE3-NEXT: retq 666; 667; AVX12-LABEL: bitcast_2i64_store: 668; AVX12: # %bb.0: 669; AVX12-NEXT: vmovmskpd %xmm0, %eax 670; AVX12-NEXT: movb %al, (%rdi) 671; AVX12-NEXT: retq 672; 673; AVX512F-LABEL: bitcast_2i64_store: 674; AVX512F: # %bb.0: 675; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 676; AVX512F-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 677; AVX512F-NEXT: kmovw %k0, %eax 678; AVX512F-NEXT: movb %al, (%rdi) 679; AVX512F-NEXT: retq 680; 681; AVX512BW-LABEL: bitcast_2i64_store: 682; AVX512BW: # %bb.0: 683; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 684; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 685; AVX512BW-NEXT: kmovd %k0, %eax 686; AVX512BW-NEXT: movb %al, (%rdi) 687; AVX512BW-NEXT: retq 688 %a1 = icmp slt <2 x i64> %a0, zeroinitializer 689 %a2 = bitcast <2 x i1> %a1 to i2 690 store i2 %a2, i2* %p 691 ret void 692} 693