1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s 3 4declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) 5declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) 6declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) 7 8define <2 x i64> @footz(<2 x i64> %a) nounwind { 9; CHECK-LABEL: footz: 10; CHECK: # %bb.0: 11; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 12; CHECK-NEXT: paddq %xmm0, %xmm1 13; CHECK-NEXT: pandn %xmm1, %xmm0 14; CHECK-NEXT: movdqa %xmm0, %xmm1 15; CHECK-NEXT: psrlw $1, %xmm1 16; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 17; CHECK-NEXT: psubb %xmm1, %xmm0 18; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 19; CHECK-NEXT: movdqa %xmm0, %xmm2 20; CHECK-NEXT: pand %xmm1, %xmm2 21; CHECK-NEXT: psrlw $2, %xmm0 22; CHECK-NEXT: pand %xmm1, %xmm0 23; CHECK-NEXT: paddb %xmm2, %xmm0 24; CHECK-NEXT: movdqa %xmm0, %xmm1 25; CHECK-NEXT: psrlw $4, %xmm1 26; CHECK-NEXT: paddb %xmm0, %xmm1 27; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 28; CHECK-NEXT: pxor %xmm0, %xmm0 29; CHECK-NEXT: psadbw %xmm0, %xmm1 30; CHECK-NEXT: movdqa %xmm1, %xmm0 31; CHECK-NEXT: retq 32 %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) 33 ret <2 x i64> %c 34 35} 36define <2 x i64> @foolz(<2 x i64> %a) nounwind { 37; CHECK-LABEL: foolz: 38; CHECK: # %bb.0: 39; CHECK-NEXT: movdqa %xmm0, %xmm1 40; CHECK-NEXT: psrlq $1, %xmm1 41; CHECK-NEXT: por %xmm0, %xmm1 42; CHECK-NEXT: movdqa %xmm1, %xmm0 43; CHECK-NEXT: psrlq $2, %xmm0 44; CHECK-NEXT: por %xmm1, %xmm0 45; CHECK-NEXT: movdqa %xmm0, %xmm1 46; CHECK-NEXT: psrlq $4, %xmm1 47; CHECK-NEXT: por %xmm0, %xmm1 48; CHECK-NEXT: movdqa %xmm1, %xmm0 49; CHECK-NEXT: psrlq $8, %xmm0 50; CHECK-NEXT: por %xmm1, %xmm0 51; CHECK-NEXT: movdqa %xmm0, %xmm1 52; CHECK-NEXT: psrlq $16, %xmm1 53; CHECK-NEXT: por %xmm0, %xmm1 54; CHECK-NEXT: movdqa %xmm1, %xmm0 55; CHECK-NEXT: psrlq $32, %xmm0 56; CHECK-NEXT: por %xmm1, %xmm0 57; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 58; CHECK-NEXT: pxor %xmm0, %xmm1 59; CHECK-NEXT: movdqa %xmm1, %xmm0 60; CHECK-NEXT: psrlw $1, %xmm0 61; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 62; CHECK-NEXT: psubb %xmm0, %xmm1 63; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 64; CHECK-NEXT: movdqa %xmm1, %xmm2 65; CHECK-NEXT: pand %xmm0, %xmm2 66; CHECK-NEXT: psrlw $2, %xmm1 67; CHECK-NEXT: pand %xmm0, %xmm1 68; CHECK-NEXT: paddb %xmm2, %xmm1 69; CHECK-NEXT: movdqa %xmm1, %xmm2 70; CHECK-NEXT: psrlw $4, %xmm2 71; CHECK-NEXT: paddb %xmm1, %xmm2 72; CHECK-NEXT: pand {{.*}}(%rip), %xmm2 73; CHECK-NEXT: pxor %xmm0, %xmm0 74; CHECK-NEXT: psadbw %xmm2, %xmm0 75; CHECK-NEXT: retq 76 %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) 77 ret <2 x i64> %c 78 79} 80 81define <2 x i64> @foopop(<2 x i64> %a) nounwind { 82; CHECK-LABEL: foopop: 83; CHECK: # %bb.0: 84; CHECK-NEXT: movdqa %xmm0, %xmm1 85; CHECK-NEXT: psrlw $1, %xmm1 86; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 87; CHECK-NEXT: psubb %xmm1, %xmm0 88; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 89; CHECK-NEXT: movdqa %xmm0, %xmm2 90; CHECK-NEXT: pand %xmm1, %xmm2 91; CHECK-NEXT: psrlw $2, %xmm0 92; CHECK-NEXT: pand %xmm1, %xmm0 93; CHECK-NEXT: paddb %xmm2, %xmm0 94; CHECK-NEXT: movdqa %xmm0, %xmm1 95; CHECK-NEXT: psrlw $4, %xmm1 96; CHECK-NEXT: paddb %xmm0, %xmm1 97; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 98; CHECK-NEXT: pxor %xmm0, %xmm0 99; CHECK-NEXT: psadbw %xmm0, %xmm1 100; CHECK-NEXT: movdqa %xmm1, %xmm0 101; CHECK-NEXT: retq 102 %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) 103 ret <2 x i64> %c 104} 105 106declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) 107declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) 108declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) 109 110define <2 x i32> @promtz(<2 x i32> %a) nounwind { 111; CHECK-LABEL: promtz: 112; CHECK: # %bb.0: 113; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 114; CHECK-NEXT: paddd %xmm0, %xmm1 115; CHECK-NEXT: pandn %xmm1, %xmm0 116; CHECK-NEXT: movdqa %xmm0, %xmm1 117; CHECK-NEXT: psrlw $1, %xmm1 118; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 119; CHECK-NEXT: psubb %xmm1, %xmm0 120; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 121; CHECK-NEXT: movdqa %xmm0, %xmm2 122; CHECK-NEXT: pand %xmm1, %xmm2 123; CHECK-NEXT: psrlw $2, %xmm0 124; CHECK-NEXT: pand %xmm1, %xmm0 125; CHECK-NEXT: paddb %xmm2, %xmm0 126; CHECK-NEXT: movdqa %xmm0, %xmm1 127; CHECK-NEXT: psrlw $4, %xmm1 128; CHECK-NEXT: paddb %xmm0, %xmm1 129; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 130; CHECK-NEXT: pxor %xmm0, %xmm0 131; CHECK-NEXT: movdqa %xmm1, %xmm2 132; CHECK-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 133; CHECK-NEXT: psadbw %xmm0, %xmm2 134; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 135; CHECK-NEXT: psadbw %xmm0, %xmm1 136; CHECK-NEXT: packuswb %xmm2, %xmm1 137; CHECK-NEXT: movdqa %xmm1, %xmm0 138; CHECK-NEXT: retq 139 %c = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) 140 ret <2 x i32> %c 141 142} 143define <2 x i32> @promlz(<2 x i32> %a) nounwind { 144; CHECK-LABEL: promlz: 145; CHECK: # %bb.0: 146; CHECK-NEXT: movdqa %xmm0, %xmm1 147; CHECK-NEXT: psrld $1, %xmm1 148; CHECK-NEXT: por %xmm0, %xmm1 149; CHECK-NEXT: movdqa %xmm1, %xmm0 150; CHECK-NEXT: psrld $2, %xmm0 151; CHECK-NEXT: por %xmm1, %xmm0 152; CHECK-NEXT: movdqa %xmm0, %xmm1 153; CHECK-NEXT: psrld $4, %xmm1 154; CHECK-NEXT: por %xmm0, %xmm1 155; CHECK-NEXT: movdqa %xmm1, %xmm0 156; CHECK-NEXT: psrld $8, %xmm0 157; CHECK-NEXT: por %xmm1, %xmm0 158; CHECK-NEXT: movdqa %xmm0, %xmm1 159; CHECK-NEXT: psrld $16, %xmm1 160; CHECK-NEXT: por %xmm0, %xmm1 161; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 162; CHECK-NEXT: pxor %xmm1, %xmm2 163; CHECK-NEXT: movdqa %xmm2, %xmm0 164; CHECK-NEXT: psrlw $1, %xmm0 165; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 166; CHECK-NEXT: psubb %xmm0, %xmm2 167; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 168; CHECK-NEXT: movdqa %xmm2, %xmm1 169; CHECK-NEXT: pand %xmm0, %xmm1 170; CHECK-NEXT: psrlw $2, %xmm2 171; CHECK-NEXT: pand %xmm0, %xmm2 172; CHECK-NEXT: paddb %xmm1, %xmm2 173; CHECK-NEXT: movdqa %xmm2, %xmm0 174; CHECK-NEXT: psrlw $4, %xmm0 175; CHECK-NEXT: paddb %xmm2, %xmm0 176; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 177; CHECK-NEXT: pxor %xmm1, %xmm1 178; CHECK-NEXT: movdqa %xmm0, %xmm2 179; CHECK-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3] 180; CHECK-NEXT: psadbw %xmm1, %xmm2 181; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 182; CHECK-NEXT: psadbw %xmm1, %xmm0 183; CHECK-NEXT: packuswb %xmm2, %xmm0 184; CHECK-NEXT: retq 185 %c = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) 186 ret <2 x i32> %c 187 188} 189 190define <2 x i32> @prompop(<2 x i32> %a) nounwind { 191; CHECK-LABEL: prompop: 192; CHECK: # %bb.0: 193; CHECK-NEXT: movdqa %xmm0, %xmm1 194; CHECK-NEXT: psrlw $1, %xmm1 195; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 196; CHECK-NEXT: psubb %xmm1, %xmm0 197; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 198; CHECK-NEXT: movdqa %xmm0, %xmm2 199; CHECK-NEXT: pand %xmm1, %xmm2 200; CHECK-NEXT: psrlw $2, %xmm0 201; CHECK-NEXT: pand %xmm1, %xmm0 202; CHECK-NEXT: paddb %xmm2, %xmm0 203; CHECK-NEXT: movdqa %xmm0, %xmm1 204; CHECK-NEXT: psrlw $4, %xmm1 205; CHECK-NEXT: paddb %xmm0, %xmm1 206; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 207; CHECK-NEXT: pxor %xmm0, %xmm0 208; CHECK-NEXT: movdqa %xmm1, %xmm2 209; CHECK-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3] 210; CHECK-NEXT: psadbw %xmm0, %xmm2 211; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 212; CHECK-NEXT: psadbw %xmm0, %xmm1 213; CHECK-NEXT: packuswb %xmm2, %xmm1 214; CHECK-NEXT: movdqa %xmm1, %xmm0 215; CHECK-NEXT: retq 216 %c = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) 217 ret <2 x i32> %c 218} 219