1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=CHECK-SSE41 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefix=CHECK-AVX1 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefix=CHECK-AVX2 6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefix=CHECK-AVX512VL 7 8; Odd+Even divisors 9define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind { 10; CHECK-SSE2-LABEL: test_srem_odd_even: 11; CHECK-SSE2: # %bb.0: 12; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1717986919,2454267027,1374389535,1374389535] 13; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 14; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 15; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 16; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 17; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 18; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4 19; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3] 20; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 21; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 22; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 23; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 24; CHECK-SSE2-NEXT: pand %xmm3, %xmm4 25; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,4294967295,0,0] 26; CHECK-SSE2-NEXT: pand %xmm0, %xmm3 27; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 28; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2 29; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 30; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 31; CHECK-SSE2-NEXT: psrad $5, %xmm3 32; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 33; CHECK-SSE2-NEXT: psrad $3, %xmm4 34; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5 35; CHECK-SSE2-NEXT: psrad $1, %xmm5 36; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0] 37; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 38; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3] 39; CHECK-SSE2-NEXT: psrld $31, %xmm2 40; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2 41; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,25,100] 42; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 43; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 44; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 45; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 46; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 47; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 48; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 49; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 50; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 51; CHECK-SSE2-NEXT: psrld $31, %xmm0 52; CHECK-SSE2-NEXT: retq 53; 54; CHECK-SSE41-LABEL: test_srem_odd_even: 55; CHECK-SSE41: # %bb.0: 56; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,1374389535,1374389535] 57; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 58; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 59; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 60; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 61; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 62; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 63; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 64; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 65; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm0[2,3],xmm3[4,5,6,7] 66; CHECK-SSE41-NEXT: paddd %xmm1, %xmm3 67; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm1 68; CHECK-SSE41-NEXT: psrad $5, %xmm1 69; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm4 70; CHECK-SSE41-NEXT: psrad $3, %xmm4 71; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm1[4,5,6,7] 72; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm5 73; CHECK-SSE41-NEXT: psrad $1, %xmm5 74; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm4[4,5,6,7] 75; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm1[2,3],xmm5[4,5],xmm1[6,7] 76; CHECK-SSE41-NEXT: psrld $31, %xmm3 77; CHECK-SSE41-NEXT: paddd %xmm5, %xmm3 78; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm3 79; CHECK-SSE41-NEXT: psubd %xmm3, %xmm0 80; CHECK-SSE41-NEXT: pcmpeqd %xmm2, %xmm0 81; CHECK-SSE41-NEXT: psrld $31, %xmm0 82; CHECK-SSE41-NEXT: retq 83; 84; CHECK-AVX1-LABEL: test_srem_odd_even: 85; CHECK-AVX1: # %bb.0: 86; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,1374389535,1374389535] 87; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 88; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 89; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 90; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 91; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 92; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 93; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 94; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3],xmm2[4,5,6,7] 95; CHECK-AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 96; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm3 97; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm4 98; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 99; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm5 100; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7] 101; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 102; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 103; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 104; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 105; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 106; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 107; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 108; CHECK-AVX1-NEXT: retq 109; 110; CHECK-AVX2-LABEL: test_srem_odd_even: 111; CHECK-AVX2: # %bb.0: 112; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,1374389535,1374389535] 113; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 114; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 115; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 116; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 117; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 118; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 119; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 120; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1],xmm2[2,3] 121; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 122; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm3 123; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 124; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 125; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 126; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 127; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 128; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 129; CHECK-AVX2-NEXT: retq 130; 131; CHECK-AVX512VL-LABEL: test_srem_odd_even: 132; CHECK-AVX512VL: # %bb.0: 133; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 134; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 135; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 136; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 137; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 138; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 139; CHECK-AVX512VL-NEXT: retq 140 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100> 141 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 142 %ret = zext <4 x i1> %cmp to <4 x i32> 143 ret <4 x i32> %ret 144} 145 146;==============================================================================; 147 148; One all-ones divisor in odd divisor 149define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind { 150; CHECK-SSE2-LABEL: test_srem_odd_allones_eq: 151; CHECK-SSE2: # %bb.0: 152; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 153; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 154; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 155; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 156; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 157; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 158; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 159; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 160; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 161; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 162; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 163; CHECK-SSE2-NEXT: retq 164; 165; CHECK-SSE41-LABEL: test_srem_odd_allones_eq: 166; CHECK-SSE41: # %bb.0: 167; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 168; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0 169; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458] 170; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 171; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 172; CHECK-SSE41-NEXT: psrld $31, %xmm0 173; CHECK-SSE41-NEXT: retq 174; 175; CHECK-AVX1-LABEL: test_srem_odd_allones_eq: 176; CHECK-AVX1: # %bb.0: 177; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 178; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 179; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 180; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 181; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 182; CHECK-AVX1-NEXT: retq 183; 184; CHECK-AVX2-LABEL: test_srem_odd_allones_eq: 185; CHECK-AVX2: # %bb.0: 186; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 187; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 188; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729] 189; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 190; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 191; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 192; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 193; CHECK-AVX2-NEXT: retq 194; 195; CHECK-AVX512VL-LABEL: test_srem_odd_allones_eq: 196; CHECK-AVX512VL: # %bb.0: 197; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 198; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 199; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 200; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 201; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 202; CHECK-AVX512VL-NEXT: retq 203 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5> 204 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 205 %ret = zext <4 x i1> %cmp to <4 x i32> 206 ret <4 x i32> %ret 207} 208define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind { 209; CHECK-SSE2-LABEL: test_srem_odd_allones_ne: 210; CHECK-SSE2: # %bb.0: 211; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 212; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 213; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 214; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 215; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 216; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 217; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 218; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 219; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 220; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 221; CHECK-SSE2-NEXT: psrld $31, %xmm0 222; CHECK-SSE2-NEXT: retq 223; 224; CHECK-SSE41-LABEL: test_srem_odd_allones_ne: 225; CHECK-SSE41: # %bb.0: 226; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 227; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0 228; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458] 229; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 230; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 231; CHECK-SSE41-NEXT: pandn {{.*}}(%rip), %xmm0 232; CHECK-SSE41-NEXT: retq 233; 234; CHECK-AVX1-LABEL: test_srem_odd_allones_ne: 235; CHECK-AVX1: # %bb.0: 236; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 237; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 238; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 239; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 240; CHECK-AVX1-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 241; CHECK-AVX1-NEXT: retq 242; 243; CHECK-AVX2-LABEL: test_srem_odd_allones_ne: 244; CHECK-AVX2: # %bb.0: 245; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 246; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 247; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729] 248; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 249; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 250; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 251; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 252; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 253; CHECK-AVX2-NEXT: retq 254; 255; CHECK-AVX512VL-LABEL: test_srem_odd_allones_ne: 256; CHECK-AVX512VL: # %bb.0: 257; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 258; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 259; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 260; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 261; CHECK-AVX512VL-NEXT: vpandnd {{.*}}(%rip){1to4}, %xmm0, %xmm0 262; CHECK-AVX512VL-NEXT: retq 263 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5> 264 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 265 %ret = zext <4 x i1> %cmp to <4 x i32> 266 ret <4 x i32> %ret 267} 268 269; One all-ones divisor in even divisor 270define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind { 271; CHECK-SSE2-LABEL: test_srem_even_allones_eq: 272; CHECK-SSE2: # %bb.0: 273; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295] 274; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 275; CHECK-SSE2-NEXT: pand %xmm2, %xmm3 276; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 277; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 278; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 279; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2454267027,0,2454267027] 280; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 281; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 282; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm5 283; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,3,2,3] 284; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 285; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2454267027,2454267027,2454267027,2454267027] 286; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm6 287; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,3,2,3] 288; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1] 289; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3 290; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <1,u,4294967295,u> 291; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm4 292; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 293; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm5 294; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] 295; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] 296; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 297; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm3 298; CHECK-SSE2-NEXT: psrad $3, %xmm3 299; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm5 300; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm3[3,0] 301; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0,2] 302; CHECK-SSE2-NEXT: psrld $31, %xmm4 303; CHECK-SSE2-NEXT: pand %xmm2, %xmm4 304; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 305; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 306; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm4 307; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] 308; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 309; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 310; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 311; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0 312; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 313; CHECK-SSE2-NEXT: psrld $31, %xmm0 314; CHECK-SSE2-NEXT: retq 315; 316; CHECK-SSE41-LABEL: test_srem_even_allones_eq: 317; CHECK-SSE41: # %bb.0: 318; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 319; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1 320; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,0,0,0] 321; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 322; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 323; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 324; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,4294967295,1] 325; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1 326; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1 327; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 328; CHECK-SSE41-NEXT: psrad $3, %xmm2 329; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 330; CHECK-SSE41-NEXT: psrld $31, %xmm1 331; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 332; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 333; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1 334; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 335; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 336; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0 337; CHECK-SSE41-NEXT: psrld $31, %xmm0 338; CHECK-SSE41-NEXT: retq 339; 340; CHECK-AVX1-LABEL: test_srem_even_allones_eq: 341; CHECK-AVX1: # %bb.0: 342; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 343; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1 344; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 345; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 346; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 347; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 348; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 349; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2 350; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 351; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 352; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 353; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 354; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 355; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 356; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 357; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 358; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 359; CHECK-AVX1-NEXT: retq 360; 361; CHECK-AVX2-LABEL: test_srem_even_allones_eq: 362; CHECK-AVX2: # %bb.0: 363; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 364; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] 365; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1 366; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 367; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 368; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 369; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 370; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 371; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 372; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 373; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3] 374; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 375; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 376; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 377; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 378; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 379; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 380; CHECK-AVX2-NEXT: retq 381; 382; CHECK-AVX512VL-LABEL: test_srem_even_allones_eq: 383; CHECK-AVX512VL: # %bb.0: 384; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 385; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 386; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 387; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 388; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 389; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 390; CHECK-AVX512VL-NEXT: retq 391 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14> 392 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 393 %ret = zext <4 x i1> %cmp to <4 x i32> 394 ret <4 x i32> %ret 395} 396define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind { 397; CHECK-SSE2-LABEL: test_srem_even_allones_ne: 398; CHECK-SSE2: # %bb.0: 399; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295] 400; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 401; CHECK-SSE2-NEXT: pand %xmm2, %xmm3 402; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 403; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 404; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 405; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2454267027,0,2454267027] 406; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 407; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 408; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm5 409; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3] 410; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] 411; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 412; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm3 413; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 414; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 415; CHECK-SSE2-NEXT: psubd %xmm4, %xmm5 416; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1] 417; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm6 418; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm6[0,2,2,3] 419; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm6 = <1,u,4294967295,u> 420; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm6 421; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] 422; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1] 423; CHECK-SSE2-NEXT: paddd %xmm5, %xmm6 424; CHECK-SSE2-NEXT: movdqa %xmm6, %xmm4 425; CHECK-SSE2-NEXT: psrad $3, %xmm4 426; CHECK-SSE2-NEXT: movdqa %xmm6, %xmm5 427; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm4[3,0] 428; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0,2] 429; CHECK-SSE2-NEXT: psrld $31, %xmm6 430; CHECK-SSE2-NEXT: pand %xmm2, %xmm6 431; CHECK-SSE2-NEXT: paddd %xmm4, %xmm6 432; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3] 433; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm6 434; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm6[0,2,2,3] 435; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 436; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 437; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 438; CHECK-SSE2-NEXT: psubd %xmm4, %xmm0 439; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 440; CHECK-SSE2-NEXT: pandn %xmm3, %xmm0 441; CHECK-SSE2-NEXT: retq 442; 443; CHECK-SSE41-LABEL: test_srem_even_allones_ne: 444; CHECK-SSE41: # %bb.0: 445; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 446; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1 447; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,0,0,0] 448; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 449; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 450; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 451; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,4294967295,1] 452; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1 453; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1 454; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 455; CHECK-SSE41-NEXT: psrad $3, %xmm2 456; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 457; CHECK-SSE41-NEXT: psrld $31, %xmm1 458; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 459; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 460; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1 461; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 462; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 463; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0 464; CHECK-SSE41-NEXT: pandn {{.*}}(%rip), %xmm0 465; CHECK-SSE41-NEXT: retq 466; 467; CHECK-AVX1-LABEL: test_srem_even_allones_ne: 468; CHECK-AVX1: # %bb.0: 469; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 470; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1 471; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 472; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 473; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 474; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 475; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 476; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2 477; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 478; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 479; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 480; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 481; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 482; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 483; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 484; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 485; CHECK-AVX1-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 486; CHECK-AVX1-NEXT: retq 487; 488; CHECK-AVX2-LABEL: test_srem_even_allones_ne: 489; CHECK-AVX2: # %bb.0: 490; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 491; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] 492; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1 493; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 494; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 495; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 496; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 497; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 498; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 499; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 500; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3] 501; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 502; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 503; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 504; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 505; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 506; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 507; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 508; CHECK-AVX2-NEXT: retq 509; 510; CHECK-AVX512VL-LABEL: test_srem_even_allones_ne: 511; CHECK-AVX512VL: # %bb.0: 512; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 513; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 514; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 515; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 516; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 517; CHECK-AVX512VL-NEXT: vpandnd {{.*}}(%rip){1to4}, %xmm0, %xmm0 518; CHECK-AVX512VL-NEXT: retq 519 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14> 520 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 521 %ret = zext <4 x i1> %cmp to <4 x i32> 522 ret <4 x i32> %ret 523} 524 525; One all-ones divisor in odd+even divisor 526define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind { 527; CHECK-SSE2-LABEL: test_srem_odd_even_allones_eq: 528; CHECK-SSE2: # %bb.0: 529; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4294967295,0] 530; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 531; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 532; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3] 533; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 534; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 535; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 536; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 537; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 538; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,0,1374389535] 539; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 540; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1 541; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 542; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3] 543; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 544; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 545; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 546; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 547; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 548; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 549; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 550; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0] 551; CHECK-SSE2-NEXT: pand %xmm0, %xmm5 552; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5 553; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2 554; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 555; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 556; CHECK-SSE2-NEXT: psrad $5, %xmm3 557; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 558; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 559; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 560; CHECK-SSE2-NEXT: psrad $3, %xmm3 561; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5 562; CHECK-SSE2-NEXT: psrad $1, %xmm5 563; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0] 564; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3] 565; CHECK-SSE2-NEXT: psrld $31, %xmm2 566; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2 567; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2 568; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,4294967295,100] 569; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 570; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 571; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 572; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 573; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 574; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 575; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 576; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 577; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 578; CHECK-SSE2-NEXT: psrld $31, %xmm0 579; CHECK-SSE2-NEXT: retq 580; 581; CHECK-SSE41-LABEL: test_srem_odd_even_allones_eq: 582; CHECK-SSE41: # %bb.0: 583; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535] 584; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 585; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 586; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 587; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 588; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 589; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 590; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4294967295,0] 591; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 592; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 593; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 594; CHECK-SSE41-NEXT: psrad $5, %xmm1 595; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3 596; CHECK-SSE41-NEXT: psrad $3, %xmm3 597; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7] 598; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 599; CHECK-SSE41-NEXT: psrad $1, %xmm1 600; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 601; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 602; CHECK-SSE41-NEXT: psrld $31, %xmm2 603; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 604; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7] 605; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 606; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 607; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 608; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0 609; CHECK-SSE41-NEXT: psrld $31, %xmm0 610; CHECK-SSE41-NEXT: retq 611; 612; CHECK-AVX1-LABEL: test_srem_odd_even_allones_eq: 613; CHECK-AVX1: # %bb.0: 614; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535] 615; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 616; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 617; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 618; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 619; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 620; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 621; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 622; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 623; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2 624; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3 625; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 626; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3 627; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7] 628; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 629; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 630; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 631; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 632; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 633; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 634; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 635; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 636; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 637; CHECK-AVX1-NEXT: retq 638; 639; CHECK-AVX2-LABEL: test_srem_odd_even_allones_eq: 640; CHECK-AVX2: # %bb.0: 641; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535] 642; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 643; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 644; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 645; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 646; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 647; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 648; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 649; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 650; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 651; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 652; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3] 653; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 654; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 655; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 656; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 657; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 658; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 659; CHECK-AVX2-NEXT: retq 660; 661; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_eq: 662; CHECK-AVX512VL: # %bb.0: 663; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 664; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 665; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 666; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 667; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 668; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 669; CHECK-AVX512VL-NEXT: retq 670 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100> 671 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 672 %ret = zext <4 x i1> %cmp to <4 x i32> 673 ret <4 x i32> %ret 674} 675define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind { 676; CHECK-SSE2-LABEL: test_srem_odd_even_allones_ne: 677; CHECK-SSE2: # %bb.0: 678; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4294967295,0] 679; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 680; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 681; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3] 682; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 683; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 684; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 685; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 686; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 687; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,0,1374389535] 688; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 689; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1 690; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 691; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3] 692; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 693; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 694; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 695; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 696; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 697; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 698; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 699; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0] 700; CHECK-SSE2-NEXT: pand %xmm0, %xmm5 701; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5 702; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2 703; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 704; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 705; CHECK-SSE2-NEXT: psrad $5, %xmm3 706; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 707; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 708; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 709; CHECK-SSE2-NEXT: psrad $3, %xmm3 710; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5 711; CHECK-SSE2-NEXT: psrad $1, %xmm5 712; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0] 713; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3] 714; CHECK-SSE2-NEXT: psrld $31, %xmm2 715; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2 716; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2 717; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,4294967295,100] 718; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 719; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 720; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 721; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 722; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 723; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 724; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 725; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 726; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 727; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 728; CHECK-SSE2-NEXT: retq 729; 730; CHECK-SSE41-LABEL: test_srem_odd_even_allones_ne: 731; CHECK-SSE41: # %bb.0: 732; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535] 733; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 734; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 735; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 736; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 737; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 738; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 739; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4294967295,0] 740; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 741; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 742; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 743; CHECK-SSE41-NEXT: psrad $5, %xmm1 744; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3 745; CHECK-SSE41-NEXT: psrad $3, %xmm3 746; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7] 747; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 748; CHECK-SSE41-NEXT: psrad $1, %xmm1 749; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 750; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 751; CHECK-SSE41-NEXT: psrld $31, %xmm2 752; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 753; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7] 754; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 755; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 756; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 757; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0 758; CHECK-SSE41-NEXT: pandn {{.*}}(%rip), %xmm0 759; CHECK-SSE41-NEXT: retq 760; 761; CHECK-AVX1-LABEL: test_srem_odd_even_allones_ne: 762; CHECK-AVX1: # %bb.0: 763; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535] 764; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 765; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 766; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 767; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 768; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 769; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 770; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 771; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 772; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2 773; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3 774; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 775; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3 776; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7] 777; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 778; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 779; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 780; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 781; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 782; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 783; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 784; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 785; CHECK-AVX1-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 786; CHECK-AVX1-NEXT: retq 787; 788; CHECK-AVX2-LABEL: test_srem_odd_even_allones_ne: 789; CHECK-AVX2: # %bb.0: 790; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535] 791; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 792; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 793; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 794; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 795; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 796; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 797; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 798; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 799; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 800; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 801; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3] 802; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 803; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 804; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 805; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 806; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 807; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 808; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 809; CHECK-AVX2-NEXT: retq 810; 811; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_ne: 812; CHECK-AVX512VL: # %bb.0: 813; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 814; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 815; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 816; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 817; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 818; CHECK-AVX512VL-NEXT: vpandnd {{.*}}(%rip){1to4}, %xmm0, %xmm0 819; CHECK-AVX512VL-NEXT: retq 820 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100> 821 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 822 %ret = zext <4 x i1> %cmp to <4 x i32> 823 ret <4 x i32> %ret 824} 825 826;------------------------------------------------------------------------------; 827 828; One power-of-two divisor in odd divisor 829define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind { 830; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo: 831; CHECK-SSE2: # %bb.0: 832; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 833; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2 834; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 835; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1717986919,1717986919,2147483649,1717986919] 836; CHECK-SSE2-NEXT: pand %xmm3, %xmm2 837; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [0,0,4294967295,0] 838; CHECK-SSE2-NEXT: pand %xmm0, %xmm4 839; CHECK-SSE2-NEXT: paddd %xmm4, %xmm2 840; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3 841; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 842; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 843; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm5 844; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3] 845; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] 846; CHECK-SSE2-NEXT: psubd %xmm2, %xmm3 847; CHECK-SSE2-NEXT: paddd %xmm4, %xmm3 848; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm2 849; CHECK-SSE2-NEXT: psrad $1, %xmm2 850; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm4 851; CHECK-SSE2-NEXT: psrad $3, %xmm4 852; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[2,0],xmm2[3,0] 853; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0,2] 854; CHECK-SSE2-NEXT: psrld $31, %xmm3 855; CHECK-SSE2-NEXT: paddd %xmm2, %xmm3 856; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 857; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3 858; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 859; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 860; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 861; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 862; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0 863; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 864; CHECK-SSE2-NEXT: psrld $31, %xmm0 865; CHECK-SSE2-NEXT: retq 866; 867; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo: 868; CHECK-SSE41: # %bb.0: 869; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 870; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 871; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] 872; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 873; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm3 874; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm4 = <1717986919,u,2147483649,u> 875; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm4 876; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 877; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 878; CHECK-SSE41-NEXT: paddd %xmm2, %xmm4 879; CHECK-SSE41-NEXT: movdqa %xmm4, %xmm2 880; CHECK-SSE41-NEXT: psrad $3, %xmm2 881; CHECK-SSE41-NEXT: movdqa %xmm4, %xmm3 882; CHECK-SSE41-NEXT: psrad $1, %xmm3 883; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7] 884; CHECK-SSE41-NEXT: psrld $31, %xmm4 885; CHECK-SSE41-NEXT: paddd %xmm3, %xmm4 886; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm4 887; CHECK-SSE41-NEXT: psubd %xmm4, %xmm0 888; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 889; CHECK-SSE41-NEXT: psrld $31, %xmm0 890; CHECK-SSE41-NEXT: retq 891; 892; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo: 893; CHECK-AVX1: # %bb.0: 894; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 895; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 896; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 897; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm3, %xmm3 898; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm4 899; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 900; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 901; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2 902; CHECK-AVX1-NEXT: vpsrad $3, %xmm2, %xmm3 903; CHECK-AVX1-NEXT: vpsrad $1, %xmm2, %xmm4 904; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5],xmm4[6,7] 905; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 906; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2 907; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm2, %xmm2 908; CHECK-AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm0 909; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 910; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 911; CHECK-AVX1-NEXT: retq 912; 913; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo: 914; CHECK-AVX2: # %bb.0: 915; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 916; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0,1],xmm0[2],xmm1[3] 917; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 918; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [1717986919,1717986919,1717986919,1717986919] 919; CHECK-AVX2-NEXT: vpmuldq %xmm4, %xmm3, %xmm3 920; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm4 921; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 922; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm4[0],xmm3[1],xmm4[2],xmm3[3] 923; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm3, %xmm2 924; CHECK-AVX2-NEXT: vpsrld $31, %xmm2, %xmm3 925; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm2, %xmm2 926; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm2, %xmm2 927; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm2, %xmm2 928; CHECK-AVX2-NEXT: vpsubd %xmm2, %xmm0, %xmm0 929; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 930; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 931; CHECK-AVX2-NEXT: retq 932; 933; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo: 934; CHECK-AVX512VL: # %bb.0: 935; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 936; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 937; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 938; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 939; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 940; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 941; CHECK-AVX512VL-NEXT: retq 942 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5> 943 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 944 %ret = zext <4 x i1> %cmp to <4 x i32> 945 ret <4 x i32> %ret 946} 947 948; One power-of-two divisor in even divisor 949define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind { 950; CHECK-SSE2-LABEL: test_srem_even_poweroftwo: 951; CHECK-SSE2: # %bb.0: 952; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 953; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2 954; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 955; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2147483649,2454267027] 956; CHECK-SSE2-NEXT: pand %xmm3, %xmm2 957; CHECK-SSE2-NEXT: paddd %xmm0, %xmm2 958; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3 959; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 960; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 961; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm4 962; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 963; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 964; CHECK-SSE2-NEXT: psubd %xmm2, %xmm3 965; CHECK-SSE2-NEXT: paddd %xmm0, %xmm3 966; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm2 967; CHECK-SSE2-NEXT: psrld $31, %xmm2 968; CHECK-SSE2-NEXT: psrad $3, %xmm3 969; CHECK-SSE2-NEXT: paddd %xmm2, %xmm3 970; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 971; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3 972; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 973; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 974; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 975; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 976; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0 977; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 978; CHECK-SSE2-NEXT: psrld $31, %xmm0 979; CHECK-SSE2-NEXT: retq 980; 981; CHECK-SSE41-LABEL: test_srem_even_poweroftwo: 982; CHECK-SSE41: # %bb.0: 983; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 984; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1 985; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,2147483649,u> 986; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 987; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 988; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 989; CHECK-SSE41-NEXT: paddd %xmm0, %xmm2 990; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 991; CHECK-SSE41-NEXT: psrld $31, %xmm1 992; CHECK-SSE41-NEXT: psrad $3, %xmm2 993; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 994; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 995; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 996; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 997; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 998; CHECK-SSE41-NEXT: psrld $31, %xmm0 999; CHECK-SSE41-NEXT: retq 1000; 1001; CHECK-AVX1-LABEL: test_srem_even_poweroftwo: 1002; CHECK-AVX1: # %bb.0: 1003; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1004; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1 1005; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 1006; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1007; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1008; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1 1009; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2 1010; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm1 1011; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1012; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1013; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1014; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1015; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1016; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1017; CHECK-AVX1-NEXT: retq 1018; 1019; CHECK-AVX2-LABEL: test_srem_even_poweroftwo: 1020; CHECK-AVX2: # %bb.0: 1021; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1022; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] 1023; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1 1024; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 1025; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1026; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 1027; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm1 1028; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 1029; CHECK-AVX2-NEXT: vpsrad $3, %xmm1, %xmm1 1030; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1031; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1032; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1033; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1034; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1035; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1036; CHECK-AVX2-NEXT: retq 1037; 1038; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo: 1039; CHECK-AVX512VL: # %bb.0: 1040; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1041; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 1042; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1043; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1044; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1045; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1046; CHECK-AVX512VL-NEXT: retq 1047 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14> 1048 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1049 %ret = zext <4 x i1> %cmp to <4 x i32> 1050 ret <4 x i32> %ret 1051} 1052 1053; One power-of-two divisor in odd+even divisor 1054define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind { 1055; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo: 1056; CHECK-SSE2: # %bb.0: 1057; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1717986919,2454267027,2147483649,1374389535] 1058; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 1059; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 1060; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 1061; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] 1062; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 1063; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4 1064; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3] 1065; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1066; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1067; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 1068; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1069; CHECK-SSE2-NEXT: pand %xmm3, %xmm4 1070; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [0,4294967295,4294967295,0] 1071; CHECK-SSE2-NEXT: pand %xmm0, %xmm3 1072; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 1073; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2 1074; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 1075; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 1076; CHECK-SSE2-NEXT: psrad $5, %xmm3 1077; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 1078; CHECK-SSE2-NEXT: psrad $3, %xmm4 1079; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5 1080; CHECK-SSE2-NEXT: psrad $1, %xmm5 1081; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0] 1082; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 1083; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3] 1084; CHECK-SSE2-NEXT: psrld $31, %xmm2 1085; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2 1086; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,16,100] 1087; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1088; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 1089; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1090; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1091; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 1092; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1093; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1094; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 1095; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1096; CHECK-SSE2-NEXT: psrld $31, %xmm0 1097; CHECK-SSE2-NEXT: retq 1098; 1099; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo: 1100; CHECK-SSE41: # %bb.0: 1101; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483649,1374389535] 1102; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1103; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1104; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 1105; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 1106; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1107; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 1108; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 1109; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3 1110; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7] 1111; CHECK-SSE41-NEXT: paddd %xmm1, %xmm3 1112; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm1 1113; CHECK-SSE41-NEXT: psrad $5, %xmm1 1114; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm4 1115; CHECK-SSE41-NEXT: psrad $3, %xmm4 1116; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm1[4,5,6,7] 1117; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm5 1118; CHECK-SSE41-NEXT: psrad $1, %xmm5 1119; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm4[4,5,6,7] 1120; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm1[2,3],xmm5[4,5],xmm1[6,7] 1121; CHECK-SSE41-NEXT: psrld $31, %xmm3 1122; CHECK-SSE41-NEXT: paddd %xmm5, %xmm3 1123; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm3 1124; CHECK-SSE41-NEXT: psubd %xmm3, %xmm0 1125; CHECK-SSE41-NEXT: pcmpeqd %xmm2, %xmm0 1126; CHECK-SSE41-NEXT: psrld $31, %xmm0 1127; CHECK-SSE41-NEXT: retq 1128; 1129; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo: 1130; CHECK-AVX1: # %bb.0: 1131; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483649,1374389535] 1132; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1133; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1134; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 1135; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 1136; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1137; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1138; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1139; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3,4,5],xmm2[6,7] 1140; CHECK-AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1141; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm3 1142; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm4 1143; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 1144; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm5 1145; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7] 1146; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 1147; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 1148; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 1149; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1150; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1151; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1152; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1153; CHECK-AVX1-NEXT: retq 1154; 1155; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo: 1156; CHECK-AVX2: # %bb.0: 1157; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483649,1374389535] 1158; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1159; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1160; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 1161; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 1162; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1163; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1164; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1165; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1,2],xmm2[3] 1166; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1167; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm3 1168; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 1169; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1170; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1171; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1172; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1173; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1174; CHECK-AVX2-NEXT: retq 1175; 1176; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo: 1177; CHECK-AVX512VL: # %bb.0: 1178; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1179; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 1180; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1181; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1182; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1183; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1184; CHECK-AVX512VL-NEXT: retq 1185 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100> 1186 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1187 %ret = zext <4 x i1> %cmp to <4 x i32> 1188 ret <4 x i32> %ret 1189} 1190 1191;------------------------------------------------------------------------------; 1192 1193; One one divisor in odd divisor 1194define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind { 1195; CHECK-SSE2-LABEL: test_srem_odd_one: 1196; CHECK-SSE2: # %bb.0: 1197; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 1198; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1199; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 1200; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1201; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 1202; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 1203; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1204; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 1205; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 1206; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 1207; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 1208; CHECK-SSE2-NEXT: retq 1209; 1210; CHECK-SSE41-LABEL: test_srem_odd_one: 1211; CHECK-SSE41: # %bb.0: 1212; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 1213; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0 1214; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458] 1215; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1216; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1217; CHECK-SSE41-NEXT: psrld $31, %xmm0 1218; CHECK-SSE41-NEXT: retq 1219; 1220; CHECK-AVX1-LABEL: test_srem_odd_one: 1221; CHECK-AVX1: # %bb.0: 1222; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1223; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 1224; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1225; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1226; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1227; CHECK-AVX1-NEXT: retq 1228; 1229; CHECK-AVX2-LABEL: test_srem_odd_one: 1230; CHECK-AVX2: # %bb.0: 1231; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 1232; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 1233; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729] 1234; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1235; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1236; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1237; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1238; CHECK-AVX2-NEXT: retq 1239; 1240; CHECK-AVX512VL-LABEL: test_srem_odd_one: 1241; CHECK-AVX512VL: # %bb.0: 1242; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 1243; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 1244; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1245; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1246; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1247; CHECK-AVX512VL-NEXT: retq 1248 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5> 1249 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1250 %ret = zext <4 x i1> %cmp to <4 x i32> 1251 ret <4 x i32> %ret 1252} 1253 1254; One one divisor in even divisor 1255define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind { 1256; CHECK-SSE2-LABEL: test_srem_even_one: 1257; CHECK-SSE2: # %bb.0: 1258; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295] 1259; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 1260; CHECK-SSE2-NEXT: pand %xmm2, %xmm3 1261; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1262; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 1263; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1264; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2454267027,0,2454267027] 1265; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 1266; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 1267; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm5 1268; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,3,2,3] 1269; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 1270; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm5 1271; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3] 1272; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] 1273; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3 1274; CHECK-SSE2-NEXT: paddd %xmm0, %xmm3 1275; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm4 1276; CHECK-SSE2-NEXT: psrad $3, %xmm4 1277; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm5 1278; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm4[3,0] 1279; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0,2] 1280; CHECK-SSE2-NEXT: psrld $31, %xmm3 1281; CHECK-SSE2-NEXT: pand %xmm2, %xmm3 1282; CHECK-SSE2-NEXT: paddd %xmm4, %xmm3 1283; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1284; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3 1285; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1286; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 1287; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1288; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 1289; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0 1290; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1291; CHECK-SSE2-NEXT: psrld $31, %xmm0 1292; CHECK-SSE2-NEXT: retq 1293; 1294; CHECK-SSE41-LABEL: test_srem_even_one: 1295; CHECK-SSE41: # %bb.0: 1296; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1297; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1 1298; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,0,0,0] 1299; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 1300; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1301; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1302; CHECK-SSE41-NEXT: paddd %xmm0, %xmm2 1303; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 1304; CHECK-SSE41-NEXT: psrad $3, %xmm1 1305; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 1306; CHECK-SSE41-NEXT: psrld $31, %xmm2 1307; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 1308; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7] 1309; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 1310; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 1311; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 1312; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0 1313; CHECK-SSE41-NEXT: psrld $31, %xmm0 1314; CHECK-SSE41-NEXT: retq 1315; 1316; CHECK-AVX1-LABEL: test_srem_even_one: 1317; CHECK-AVX1: # %bb.0: 1318; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1319; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1 1320; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 1321; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1322; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1323; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1 1324; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2 1325; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 1326; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 1327; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1328; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 1329; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 1330; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1331; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1332; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 1333; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1334; CHECK-AVX1-NEXT: retq 1335; 1336; CHECK-AVX2-LABEL: test_srem_even_one: 1337; CHECK-AVX2: # %bb.0: 1338; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1339; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] 1340; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1 1341; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 1342; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1343; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 1344; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm1 1345; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 1346; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 1347; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3] 1348; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 1349; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1350; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1351; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1352; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 1353; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1354; CHECK-AVX2-NEXT: retq 1355; 1356; CHECK-AVX512VL-LABEL: test_srem_even_one: 1357; CHECK-AVX512VL: # %bb.0: 1358; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 1359; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 1360; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 1361; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1362; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1363; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1364; CHECK-AVX512VL-NEXT: retq 1365 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14> 1366 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1367 %ret = zext <4 x i1> %cmp to <4 x i32> 1368 ret <4 x i32> %ret 1369} 1370 1371; One one divisor in odd+even divisor 1372define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind { 1373; CHECK-SSE2-LABEL: test_srem_odd_even_one: 1374; CHECK-SSE2: # %bb.0: 1375; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1717986919,2454267027,0,1374389535] 1376; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 1377; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 1378; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 1379; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1380; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 1381; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4 1382; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3] 1383; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 1384; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1385; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 1386; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1387; CHECK-SSE2-NEXT: pand %xmm2, %xmm4 1388; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,0,0] 1389; CHECK-SSE2-NEXT: pand %xmm0, %xmm2 1390; CHECK-SSE2-NEXT: paddd %xmm4, %xmm2 1391; CHECK-SSE2-NEXT: psubd %xmm2, %xmm3 1392; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,4294967295,0] 1393; CHECK-SSE2-NEXT: pand %xmm0, %xmm2 1394; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 1395; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 1396; CHECK-SSE2-NEXT: psrad $5, %xmm3 1397; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 1398; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 1399; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 1400; CHECK-SSE2-NEXT: psrad $3, %xmm3 1401; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5 1402; CHECK-SSE2-NEXT: psrad $1, %xmm5 1403; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0] 1404; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3] 1405; CHECK-SSE2-NEXT: psrld $31, %xmm2 1406; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2 1407; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2 1408; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,1,100] 1409; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1410; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 1411; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1412; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1413; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 1414; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1415; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1416; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 1417; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1418; CHECK-SSE2-NEXT: psrld $31, %xmm0 1419; CHECK-SSE2-NEXT: retq 1420; 1421; CHECK-SSE41-LABEL: test_srem_odd_even_one: 1422; CHECK-SSE41: # %bb.0: 1423; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535] 1424; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1425; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1426; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 1427; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 1428; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1429; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 1430; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 1431; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3 1432; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7] 1433; CHECK-SSE41-NEXT: paddd %xmm1, %xmm3 1434; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm1 1435; CHECK-SSE41-NEXT: psrad $5, %xmm1 1436; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm4 1437; CHECK-SSE41-NEXT: psrad $3, %xmm4 1438; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7] 1439; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm1 1440; CHECK-SSE41-NEXT: psrad $1, %xmm1 1441; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 1442; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7] 1443; CHECK-SSE41-NEXT: psrld $31, %xmm3 1444; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7] 1445; CHECK-SSE41-NEXT: paddd %xmm1, %xmm3 1446; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm3 1447; CHECK-SSE41-NEXT: psubd %xmm3, %xmm0 1448; CHECK-SSE41-NEXT: pcmpeqd %xmm2, %xmm0 1449; CHECK-SSE41-NEXT: psrld $31, %xmm0 1450; CHECK-SSE41-NEXT: retq 1451; 1452; CHECK-AVX1-LABEL: test_srem_odd_even_one: 1453; CHECK-AVX1: # %bb.0: 1454; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535] 1455; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1456; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1457; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 1458; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 1459; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1460; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1461; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1462; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3,4,5],xmm2[6,7] 1463; CHECK-AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1464; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm3 1465; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm4 1466; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 1467; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm4 1468; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7] 1469; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 1470; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 1471; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 1472; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 1473; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1474; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1475; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1476; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1477; CHECK-AVX1-NEXT: retq 1478; 1479; CHECK-AVX2-LABEL: test_srem_odd_even_one: 1480; CHECK-AVX2: # %bb.0: 1481; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535] 1482; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1483; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1484; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 1485; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 1486; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1487; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1488; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 1489; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1,2],xmm2[3] 1490; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1491; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm3 1492; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm3[0,1],xmm2[2],xmm3[3] 1493; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 1494; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1495; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1496; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1497; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1498; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1499; CHECK-AVX2-NEXT: retq 1500; 1501; CHECK-AVX512VL-LABEL: test_srem_odd_even_one: 1502; CHECK-AVX512VL: # %bb.0: 1503; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1504; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 1505; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1506; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1507; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1508; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1509; CHECK-AVX512VL-NEXT: retq 1510 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100> 1511 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1512 %ret = zext <4 x i1> %cmp to <4 x i32> 1513 ret <4 x i32> %ret 1514} 1515 1516;------------------------------------------------------------------------------; 1517 1518; One INT_MIN divisor in odd divisor 1519define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind { 1520; CHECK-SSE2-LABEL: test_srem_odd_INT_MIN: 1521; CHECK-SSE2: # %bb.0: 1522; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1523; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] 1524; CHECK-SSE2-NEXT: pand %xmm0, %xmm2 1525; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm2 1526; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1527; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm0 1528; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3] 1529; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 1530; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 1531; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 1532; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm3 1533; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm3 1534; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm3 1535; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 1536; CHECK-SSE2-NEXT: pxor %xmm3, %xmm0 1537; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[3,0] 1538; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0,2] 1539; CHECK-SSE2-NEXT: psrld $31, %xmm0 1540; CHECK-SSE2-NEXT: retq 1541; 1542; CHECK-SSE41-LABEL: test_srem_odd_INT_MIN: 1543; CHECK-SSE41: # %bb.0: 1544; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1545; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] 1546; CHECK-SSE41-NEXT: pand %xmm0, %xmm2 1547; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm2 1548; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 1549; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0 1550; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,0,858993458] 1551; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1552; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1553; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7] 1554; CHECK-SSE41-NEXT: psrld $31, %xmm0 1555; CHECK-SSE41-NEXT: retq 1556; 1557; CHECK-AVX1-LABEL: test_srem_odd_INT_MIN: 1558; CHECK-AVX1: # %bb.0: 1559; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1560; CHECK-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm2 1561; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 1562; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1563; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 1564; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm2 1565; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1566; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 1567; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1568; CHECK-AVX1-NEXT: retq 1569; 1570; CHECK-AVX2-LABEL: test_srem_odd_INT_MIN: 1571; CHECK-AVX2: # %bb.0: 1572; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1573; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] 1574; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm2 1575; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 1576; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1577; CHECK-AVX2-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 1578; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm2 1579; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1580; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1581; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1582; CHECK-AVX2-NEXT: retq 1583; 1584; CHECK-AVX512VL-LABEL: test_srem_odd_INT_MIN: 1585; CHECK-AVX512VL: # %bb.0: 1586; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1587; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm2 1588; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 1589; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1590; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 1591; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm2 1592; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1593; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1594; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1595; CHECK-AVX512VL-NEXT: retq 1596 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5> 1597 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1598 %ret = zext <4 x i1> %cmp to <4 x i32> 1599 ret <4 x i32> %ret 1600} 1601 1602; One INT_MIN divisor in even divisor 1603define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind { 1604; CHECK-SSE2-LABEL: test_srem_even_INT_MIN: 1605; CHECK-SSE2: # %bb.0: 1606; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1607; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2 1608; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm2 1609; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2147483647,2454267027] 1610; CHECK-SSE2-NEXT: pand %xmm3, %xmm2 1611; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295,0,4294967295] 1612; CHECK-SSE2-NEXT: pand %xmm0, %xmm4 1613; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4 1614; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3 1615; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3] 1616; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1617; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2454267027,2454267027,2454267027] 1618; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5 1619; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3] 1620; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1621; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2 1622; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <1,u,4294967295,u> 1623; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm4 1624; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 1625; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3 1626; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1627; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 1628; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4 1629; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm2 1630; CHECK-SSE2-NEXT: psrad $3, %xmm2 1631; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm3 1632; CHECK-SSE2-NEXT: psrad $30, %xmm3 1633; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[3,0] 1634; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0,2] 1635; CHECK-SSE2-NEXT: psrld $31, %xmm4 1636; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4 1637; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] 1638; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm4 1639; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] 1640; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 1641; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1642; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 1643; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0 1644; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1645; CHECK-SSE2-NEXT: psrld $31, %xmm0 1646; CHECK-SSE2-NEXT: retq 1647; 1648; CHECK-SSE41-LABEL: test_srem_even_INT_MIN: 1649; CHECK-SSE41: # %bb.0: 1650; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1651; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1 1652; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,2147483647,u> 1653; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 1654; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1655; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1656; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,4294967295,1] 1657; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1 1658; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1 1659; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 1660; CHECK-SSE41-NEXT: psrad $30, %xmm2 1661; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm3 1662; CHECK-SSE41-NEXT: psrad $3, %xmm3 1663; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7] 1664; CHECK-SSE41-NEXT: psrld $31, %xmm1 1665; CHECK-SSE41-NEXT: paddd %xmm3, %xmm1 1666; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 1667; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 1668; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1669; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1670; CHECK-SSE41-NEXT: psrld $31, %xmm0 1671; CHECK-SSE41-NEXT: retq 1672; 1673; CHECK-AVX1-LABEL: test_srem_even_INT_MIN: 1674; CHECK-AVX1: # %bb.0: 1675; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1676; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1 1677; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 1678; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1679; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1680; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 1681; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1682; CHECK-AVX1-NEXT: vpsrad $30, %xmm1, %xmm2 1683; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3 1684; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7] 1685; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 1686; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 1687; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1688; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1689; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1690; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1691; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1692; CHECK-AVX1-NEXT: retq 1693; 1694; CHECK-AVX2-LABEL: test_srem_even_INT_MIN: 1695; CHECK-AVX2: # %bb.0: 1696; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1697; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] 1698; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1 1699; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 1700; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1701; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 1702; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 1703; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1704; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 1705; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 1706; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1707; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1708; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1709; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1710; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1711; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1712; CHECK-AVX2-NEXT: retq 1713; 1714; CHECK-AVX512VL-LABEL: test_srem_even_INT_MIN: 1715; CHECK-AVX512VL: # %bb.0: 1716; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1717; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 1718; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [306783378,306783378,0,306783378] 1719; CHECK-AVX512VL-NEXT: vpaddd %xmm3, %xmm2, %xmm2 1720; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm2, %xmm2 1721; CHECK-AVX512VL-NEXT: vpminud %xmm3, %xmm2, %xmm3 1722; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 1723; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 1724; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1725; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3] 1726; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1727; CHECK-AVX512VL-NEXT: retq 1728 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14> 1729 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1730 %ret = zext <4 x i1> %cmp to <4 x i32> 1731 ret <4 x i32> %ret 1732} 1733 1734; One INT_MIN divisor in odd+even divisor 1735define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind { 1736; CHECK-SSE2-LABEL: test_srem_odd_even_INT_MIN: 1737; CHECK-SSE2: # %bb.0: 1738; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4294967295,0] 1739; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 1740; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 1741; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3] 1742; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1743; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 1744; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 1745; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1746; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 1747; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,2147483647,1374389535] 1748; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 1749; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1 1750; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 1751; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3] 1752; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 1753; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1754; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1755; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1756; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 1757; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1758; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 1759; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0] 1760; CHECK-SSE2-NEXT: pand %xmm0, %xmm5 1761; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5 1762; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2 1763; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 1764; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 1765; CHECK-SSE2-NEXT: psrad $5, %xmm3 1766; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 1767; CHECK-SSE2-NEXT: psrad $30, %xmm4 1768; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 1769; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 1770; CHECK-SSE2-NEXT: psrad $3, %xmm3 1771; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5 1772; CHECK-SSE2-NEXT: psrad $1, %xmm5 1773; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0] 1774; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3] 1775; CHECK-SSE2-NEXT: psrld $31, %xmm2 1776; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2 1777; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,2147483648,100] 1778; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1779; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 1780; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1781; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1782; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 1783; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1784; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1785; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 1786; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1787; CHECK-SSE2-NEXT: psrld $31, %xmm0 1788; CHECK-SSE2-NEXT: retq 1789; 1790; CHECK-SSE41-LABEL: test_srem_odd_even_INT_MIN: 1791; CHECK-SSE41: # %bb.0: 1792; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535] 1793; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1794; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1795; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 1796; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 1797; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1798; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 1799; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4294967295,0] 1800; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 1801; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 1802; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 1803; CHECK-SSE41-NEXT: psrad $5, %xmm1 1804; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3 1805; CHECK-SSE41-NEXT: psrad $3, %xmm3 1806; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7] 1807; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 1808; CHECK-SSE41-NEXT: psrad $30, %xmm1 1809; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm4 1810; CHECK-SSE41-NEXT: psrad $1, %xmm4 1811; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7] 1812; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 1813; CHECK-SSE41-NEXT: psrld $31, %xmm2 1814; CHECK-SSE41-NEXT: paddd %xmm4, %xmm2 1815; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 1816; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 1817; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1818; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1819; CHECK-SSE41-NEXT: psrld $31, %xmm0 1820; CHECK-SSE41-NEXT: retq 1821; 1822; CHECK-AVX1-LABEL: test_srem_odd_even_INT_MIN: 1823; CHECK-AVX1: # %bb.0: 1824; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535] 1825; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1826; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1827; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 1828; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 1829; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1830; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1831; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 1832; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1833; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2 1834; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3 1835; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 1836; CHECK-AVX1-NEXT: vpsrad $30, %xmm1, %xmm3 1837; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm4 1838; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 1839; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 1840; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 1841; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 1842; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1843; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1844; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1845; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1846; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1847; CHECK-AVX1-NEXT: retq 1848; 1849; CHECK-AVX2-LABEL: test_srem_odd_even_INT_MIN: 1850; CHECK-AVX2: # %bb.0: 1851; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535] 1852; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1853; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1854; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 1855; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 1856; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1857; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1858; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 1859; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1860; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 1861; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 1862; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1863; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1864; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1865; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1866; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1867; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1868; CHECK-AVX2-NEXT: retq 1869; 1870; CHECK-AVX512VL-LABEL: test_srem_odd_even_INT_MIN: 1871; CHECK-AVX512VL: # %bb.0: 1872; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1873; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm2 1874; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1 1875; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1876; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 1877; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1878; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm2 1879; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 1880; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 1881; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1882; CHECK-AVX512VL-NEXT: retq 1883 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100> 1884 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 1885 %ret = zext <4 x i1> %cmp to <4 x i32> 1886 ret <4 x i32> %ret 1887} 1888 1889;==============================================================================; 1890 1891; One all-ones divisor and power-of-two divisor divisor in odd divisor 1892define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1893; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo: 1894; CHECK-SSE2: # %bb.0: 1895; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,0] 1896; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 1897; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 1898; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3] 1899; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1900; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 1901; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 1902; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1903; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 1904; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,0,2147483649,1717986919] 1905; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 1906; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1 1907; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 1908; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3] 1909; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 1910; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1911; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1912; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1913; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 1914; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 1915; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 1916; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,0,4294967295,0] 1917; CHECK-SSE2-NEXT: pand %xmm0, %xmm5 1918; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5 1919; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2 1920; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 1921; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 1922; CHECK-SSE2-NEXT: psrad $1, %xmm3 1923; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 1924; CHECK-SSE2-NEXT: psrad $3, %xmm4 1925; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 1926; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] 1927; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3] 1928; CHECK-SSE2-NEXT: psrld $31, %xmm2 1929; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2 1930; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 1931; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,16,5] 1932; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1933; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 1934; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1935; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 1936; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 1937; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1938; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1939; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 1940; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1941; CHECK-SSE2-NEXT: psrld $31, %xmm0 1942; CHECK-SSE2-NEXT: retq 1943; 1944; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo: 1945; CHECK-SSE41: # %bb.0: 1946; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1717986919] 1947; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1948; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1949; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 1950; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 1951; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1952; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 1953; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,1,0] 1954; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 1955; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 1956; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 1957; CHECK-SSE41-NEXT: psrad $1, %xmm1 1958; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3 1959; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7] 1960; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm4 1961; CHECK-SSE41-NEXT: psrad $3, %xmm4 1962; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm1[0,1,2,3],xmm4[4,5,6,7] 1963; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 1964; CHECK-SSE41-NEXT: psrld $31, %xmm2 1965; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1966; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5,6,7] 1967; CHECK-SSE41-NEXT: paddd %xmm4, %xmm2 1968; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 1969; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 1970; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1971; CHECK-SSE41-NEXT: psrld $31, %xmm0 1972; CHECK-SSE41-NEXT: retq 1973; 1974; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo: 1975; CHECK-AVX1: # %bb.0: 1976; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1717986919] 1977; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1978; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1979; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 1980; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 1981; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1982; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1983; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 1984; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1985; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm2 1986; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm2[4,5,6,7] 1987; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm4 1988; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7] 1989; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1990; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 1991; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 1992; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7] 1993; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 1994; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1995; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1996; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 1997; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1998; CHECK-AVX1-NEXT: retq 1999; 2000; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo: 2001; CHECK-AVX2: # %bb.0: 2002; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1717986919] 2003; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2004; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2005; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2006; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2007; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2008; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2009; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2010; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2011; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 2012; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 2013; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3] 2014; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 2015; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2016; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2017; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2018; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2019; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2020; CHECK-AVX2-NEXT: retq 2021; 2022; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo: 2023; CHECK-AVX512VL: # %bb.0: 2024; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2025; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2026; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2027; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2028; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2029; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2030; CHECK-AVX512VL-NEXT: retq 2031 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5> 2032 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2033 %ret = zext <4 x i1> %cmp to <4 x i32> 2034 ret <4 x i32> %ret 2035} 2036 2037; One all-ones divisor and power-of-two divisor divisor in even divisor 2038define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { 2039; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo: 2040; CHECK-SSE2: # %bb.0: 2041; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,4294967295] 2042; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 2043; CHECK-SSE2-NEXT: pand %xmm2, %xmm3 2044; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2045; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 2046; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 2047; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,0,2147483649,2454267027] 2048; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 2049; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 2050; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 2051; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm3 2052; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 2053; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 2054; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] 2055; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm5 2056; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3] 2057; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] 2058; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3 2059; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,4294967295,1,1] 2060; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm5 2061; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm5 2062; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] 2063; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2064; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm4 2065; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 2066; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] 2067; CHECK-SSE2-NEXT: paddd %xmm3, %xmm5 2068; CHECK-SSE2-NEXT: movdqa %xmm5, %xmm3 2069; CHECK-SSE2-NEXT: psrad $3, %xmm3 2070; CHECK-SSE2-NEXT: movdqa %xmm5, %xmm4 2071; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,0],xmm3[0,0] 2072; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[2,0],xmm3[2,3] 2073; CHECK-SSE2-NEXT: psrld $31, %xmm5 2074; CHECK-SSE2-NEXT: pand %xmm2, %xmm5 2075; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5 2076; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,4294967295,16,14] 2077; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 2078; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm5 2079; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3] 2080; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2081; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 2082; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2083; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 2084; CHECK-SSE2-NEXT: psubd %xmm4, %xmm0 2085; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2086; CHECK-SSE2-NEXT: psrld $31, %xmm0 2087; CHECK-SSE2-NEXT: retq 2088; 2089; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo: 2090; CHECK-SSE41: # %bb.0: 2091; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,0,2147483649,2454267027] 2092; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2093; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2094; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 2095; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 2096; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2097; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2098; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,4294967295,1,1] 2099; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 2100; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 2101; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 2102; CHECK-SSE41-NEXT: psrad $3, %xmm1 2103; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7] 2104; CHECK-SSE41-NEXT: psrld $31, %xmm2 2105; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 2106; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5,6,7] 2107; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 2108; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 2109; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 2110; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0 2111; CHECK-SSE41-NEXT: psrld $31, %xmm0 2112; CHECK-SSE41-NEXT: retq 2113; 2114; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo: 2115; CHECK-AVX1: # %bb.0: 2116; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,0,2147483649,2454267027] 2117; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2118; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2119; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2120; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2121; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2122; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2123; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2124; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2125; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2 2126; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5,6,7] 2127; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 2128; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 2129; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7] 2130; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 2131; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2132; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2133; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2134; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2135; CHECK-AVX1-NEXT: retq 2136; 2137; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo: 2138; CHECK-AVX2: # %bb.0: 2139; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,0,2147483649,2454267027] 2140; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2141; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2142; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2143; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2144; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2145; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2146; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2147; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2148; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 2149; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 2150; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3] 2151; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 2152; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2153; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2154; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2155; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2156; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2157; CHECK-AVX2-NEXT: retq 2158; 2159; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo: 2160; CHECK-AVX512VL: # %bb.0: 2161; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2162; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2163; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2164; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2165; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2166; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2167; CHECK-AVX512VL-NEXT: retq 2168 %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14> 2169 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2170 %ret = zext <4 x i1> %cmp to <4 x i32> 2171 ret <4 x i32> %ret 2172} 2173 2174; One all-ones divisor and power-of-two divisor divisor in odd+even divisor 2175define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { 2176; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_poweroftwo: 2177; CHECK-SSE2: # %bb.0: 2178; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,0] 2179; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 2180; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 2181; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3] 2182; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2183; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 2184; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 2185; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2186; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 2187; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,0,2147483649,1374389535] 2188; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 2189; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1 2190; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 2191; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3] 2192; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 2193; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 2194; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2195; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2196; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 2197; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 2198; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 2199; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,0,4294967295,0] 2200; CHECK-SSE2-NEXT: pand %xmm0, %xmm5 2201; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5 2202; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2 2203; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 2204; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 2205; CHECK-SSE2-NEXT: psrad $5, %xmm3 2206; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 2207; CHECK-SSE2-NEXT: psrad $3, %xmm4 2208; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 2209; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 2210; CHECK-SSE2-NEXT: psrad $1, %xmm3 2211; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] 2212; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3] 2213; CHECK-SSE2-NEXT: psrld $31, %xmm2 2214; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2 2215; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 2216; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,16,100] 2217; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 2218; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 2219; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2220; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2221; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 2222; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2223; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2224; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 2225; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2226; CHECK-SSE2-NEXT: psrld $31, %xmm0 2227; CHECK-SSE2-NEXT: retq 2228; 2229; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_poweroftwo: 2230; CHECK-SSE41: # %bb.0: 2231; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1374389535] 2232; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2233; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2234; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 2235; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 2236; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2237; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2238; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,1,0] 2239; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 2240; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 2241; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 2242; CHECK-SSE41-NEXT: psrad $5, %xmm1 2243; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 2244; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3 2245; CHECK-SSE41-NEXT: psrad $3, %xmm3 2246; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm4 2247; CHECK-SSE41-NEXT: psrad $1, %xmm4 2248; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm3[4,5,6,7] 2249; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm1[2,3],xmm4[4,5],xmm1[6,7] 2250; CHECK-SSE41-NEXT: psrld $31, %xmm2 2251; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2252; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5,6,7] 2253; CHECK-SSE41-NEXT: paddd %xmm4, %xmm2 2254; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 2255; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 2256; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2257; CHECK-SSE41-NEXT: psrld $31, %xmm0 2258; CHECK-SSE41-NEXT: retq 2259; 2260; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_poweroftwo: 2261; CHECK-AVX1: # %bb.0: 2262; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1374389535] 2263; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2264; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2265; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2266; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2267; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2268; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2269; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2270; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2271; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2 2272; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7] 2273; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3 2274; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm4 2275; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 2276; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 2277; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 2278; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 2279; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7] 2280; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 2281; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2282; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2283; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2284; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2285; CHECK-AVX1-NEXT: retq 2286; 2287; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_poweroftwo: 2288; CHECK-AVX2: # %bb.0: 2289; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1374389535] 2290; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2291; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2292; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2293; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2294; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2295; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2296; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2297; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2298; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 2299; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 2300; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3] 2301; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 2302; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2303; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2304; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2305; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2306; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2307; CHECK-AVX2-NEXT: retq 2308; 2309; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_poweroftwo: 2310; CHECK-AVX512VL: # %bb.0: 2311; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2312; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2313; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2314; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2315; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2316; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2317; CHECK-AVX512VL-NEXT: retq 2318 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100> 2319 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2320 %ret = zext <4 x i1> %cmp to <4 x i32> 2321 ret <4 x i32> %ret 2322} 2323 2324;------------------------------------------------------------------------------; 2325 2326; One all-ones divisor and one one divisor in odd divisor 2327define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind { 2328; CHECK-SSE2-LABEL: test_srem_odd_allones_and_one: 2329; CHECK-SSE2: # %bb.0: 2330; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 2331; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 2332; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 2333; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2334; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 2335; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 2336; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2337; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm0 2338; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 2339; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 2340; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 2341; CHECK-SSE2-NEXT: retq 2342; 2343; CHECK-SSE41-LABEL: test_srem_odd_allones_and_one: 2344; CHECK-SSE41: # %bb.0: 2345; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 2346; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0 2347; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,4294967295,858993458] 2348; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 2349; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2350; CHECK-SSE41-NEXT: psrld $31, %xmm0 2351; CHECK-SSE41-NEXT: retq 2352; 2353; CHECK-AVX1-LABEL: test_srem_odd_allones_and_one: 2354; CHECK-AVX1: # %bb.0: 2355; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2356; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2357; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2358; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2359; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2360; CHECK-AVX1-NEXT: retq 2361; 2362; CHECK-AVX2-LABEL: test_srem_odd_allones_and_one: 2363; CHECK-AVX2: # %bb.0: 2364; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 2365; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 2366; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729] 2367; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 2368; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2369; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2370; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2371; CHECK-AVX2-NEXT: retq 2372; 2373; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_one: 2374; CHECK-AVX512VL: # %bb.0: 2375; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 2376; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 2377; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2378; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2379; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2380; CHECK-AVX512VL-NEXT: retq 2381 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5> 2382 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2383 %ret = zext <4 x i1> %cmp to <4 x i32> 2384 ret <4 x i32> %ret 2385} 2386 2387; One all-ones divisor and one one divisor in even divisor 2388define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind { 2389; CHECK-SSE2-LABEL: test_srem_even_allones_and_one: 2390; CHECK-SSE2: # %bb.0: 2391; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,0,4294967295] 2392; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 2393; CHECK-SSE2-NEXT: pand %xmm2, %xmm3 2394; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2395; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 2396; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 2397; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,0,0,2454267027] 2398; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 2399; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 2400; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 2401; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm3 2402; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 2403; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,2,3,3] 2404; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] 2405; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm5 2406; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3] 2407; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] 2408; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3 2409; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,4294967295,1,1] 2410; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm5 2411; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm5 2412; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] 2413; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] 2414; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm4 2415; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 2416; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] 2417; CHECK-SSE2-NEXT: paddd %xmm3, %xmm5 2418; CHECK-SSE2-NEXT: movdqa %xmm5, %xmm3 2419; CHECK-SSE2-NEXT: psrad $3, %xmm3 2420; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm5[1,2] 2421; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,3,1] 2422; CHECK-SSE2-NEXT: psrld $31, %xmm5 2423; CHECK-SSE2-NEXT: pand %xmm2, %xmm5 2424; CHECK-SSE2-NEXT: paddd %xmm3, %xmm5 2425; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,4294967295,1,14] 2426; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3] 2427; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm5 2428; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3] 2429; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2430; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 2431; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2432; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 2433; CHECK-SSE2-NEXT: psubd %xmm4, %xmm0 2434; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2435; CHECK-SSE2-NEXT: psrld $31, %xmm0 2436; CHECK-SSE2-NEXT: retq 2437; 2438; CHECK-SSE41-LABEL: test_srem_even_allones_and_one: 2439; CHECK-SSE41: # %bb.0: 2440; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,0,0,2454267027] 2441; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3] 2442; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2443; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 2444; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 2445; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2446; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2447; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,4294967295,1,1] 2448; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 2449; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 2450; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 2451; CHECK-SSE41-NEXT: psrad $3, %xmm1 2452; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5],xmm1[6,7] 2453; CHECK-SSE41-NEXT: psrld $31, %xmm2 2454; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 2455; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7] 2456; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 2457; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 2458; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 2459; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0 2460; CHECK-SSE41-NEXT: psrld $31, %xmm0 2461; CHECK-SSE41-NEXT: retq 2462; 2463; CHECK-AVX1-LABEL: test_srem_even_allones_and_one: 2464; CHECK-AVX1: # %bb.0: 2465; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,0,0,2454267027] 2466; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3] 2467; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2468; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2469; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2470; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2471; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2472; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2473; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2474; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2 2475; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3,4,5],xmm2[6,7] 2476; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 2477; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 2478; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7] 2479; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 2480; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2481; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2482; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2483; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2484; CHECK-AVX1-NEXT: retq 2485; 2486; CHECK-AVX2-LABEL: test_srem_even_allones_and_one: 2487; CHECK-AVX2: # %bb.0: 2488; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,0,0,2454267027] 2489; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3] 2490; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2491; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2492; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2493; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2494; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2495; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2496; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2497; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 2498; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 2499; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3] 2500; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 2501; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2502; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2503; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2504; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2505; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2506; CHECK-AVX2-NEXT: retq 2507; 2508; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_one: 2509; CHECK-AVX512VL: # %bb.0: 2510; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 2511; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 2512; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 2513; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2514; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2515; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2516; CHECK-AVX512VL-NEXT: retq 2517 %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14> 2518 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2519 %ret = zext <4 x i1> %cmp to <4 x i32> 2520 ret <4 x i32> %ret 2521} 2522 2523; One all-ones divisor and one one divisor in odd+even divisor 2524define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind { 2525; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_one: 2526; CHECK-SSE2: # %bb.0: 2527; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,0] 2528; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 2529; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 2530; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3] 2531; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2532; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 2533; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 2534; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2535; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 2536; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,0,0,1374389535] 2537; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 2538; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1 2539; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] 2540; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[2,2,3,3] 2541; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1 2542; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 2543; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2544; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2545; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 2546; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 2547; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 2548; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2 2549; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 2550; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 2551; CHECK-SSE2-NEXT: psrad $5, %xmm3 2552; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 2553; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 2554; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 2555; CHECK-SSE2-NEXT: psrad $1, %xmm3 2556; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] 2557; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3] 2558; CHECK-SSE2-NEXT: psrld $31, %xmm2 2559; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2 2560; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 2561; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,1,100] 2562; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 2563; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 2564; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2565; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2566; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 2567; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2568; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2569; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 2570; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2571; CHECK-SSE2-NEXT: psrld $31, %xmm0 2572; CHECK-SSE2-NEXT: retq 2573; 2574; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_one: 2575; CHECK-SSE41: # %bb.0: 2576; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,0,0,1374389535] 2577; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3] 2578; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2579; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 2580; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 2581; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2582; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2583; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,1,0] 2584; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2 2585; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 2586; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 2587; CHECK-SSE41-NEXT: psrad $5, %xmm1 2588; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 2589; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3 2590; CHECK-SSE41-NEXT: psrad $1, %xmm3 2591; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 2592; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,3],xmm3[4,5],xmm1[6,7] 2593; CHECK-SSE41-NEXT: psrld $31, %xmm2 2594; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2595; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3,4,5],xmm2[6,7] 2596; CHECK-SSE41-NEXT: paddd %xmm3, %xmm2 2597; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 2598; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 2599; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2600; CHECK-SSE41-NEXT: psrld $31, %xmm0 2601; CHECK-SSE41-NEXT: retq 2602; 2603; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_one: 2604; CHECK-AVX1: # %bb.0: 2605; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,0,1374389535] 2606; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3] 2607; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2608; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2609; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2610; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2611; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2612; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2613; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2614; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2 2615; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7] 2616; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3 2617; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7] 2618; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 2619; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 2620; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 2621; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7] 2622; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 2623; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2624; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2625; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2626; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2627; CHECK-AVX1-NEXT: retq 2628; 2629; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_one: 2630; CHECK-AVX2: # %bb.0: 2631; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,0,1374389535] 2632; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3] 2633; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2634; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2635; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2636; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2637; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2638; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2 2639; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2640; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 2641; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 2642; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3] 2643; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 2644; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2645; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2646; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2647; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2648; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2649; CHECK-AVX2-NEXT: retq 2650; 2651; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_one: 2652; CHECK-AVX512VL: # %bb.0: 2653; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2654; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2655; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2656; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2657; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2658; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2659; CHECK-AVX512VL-NEXT: retq 2660 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100> 2661 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2662 %ret = zext <4 x i1> %cmp to <4 x i32> 2663 ret <4 x i32> %ret 2664} 2665 2666;------------------------------------------------------------------------------; 2667 2668; One power-of-two divisor divisor and one divisor in odd divisor 2669define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { 2670; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo_and_one: 2671; CHECK-SSE2: # %bb.0: 2672; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1717986919,2147483649,0,1717986919] 2673; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 2674; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 2675; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 2676; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 2677; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 2678; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4 2679; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3] 2680; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 2681; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2682; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 2683; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 2684; CHECK-SSE2-NEXT: pand %xmm2, %xmm4 2685; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,0,0] 2686; CHECK-SSE2-NEXT: pand %xmm0, %xmm2 2687; CHECK-SSE2-NEXT: paddd %xmm4, %xmm2 2688; CHECK-SSE2-NEXT: psubd %xmm2, %xmm3 2689; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,4294967295,0] 2690; CHECK-SSE2-NEXT: pand %xmm0, %xmm2 2691; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 2692; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 2693; CHECK-SSE2-NEXT: psrad $1, %xmm3 2694; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 2695; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 2696; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5 2697; CHECK-SSE2-NEXT: psrad $3, %xmm5 2698; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0] 2699; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3] 2700; CHECK-SSE2-NEXT: psrld $31, %xmm2 2701; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2 2702; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 2703; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,16,1,5] 2704; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 2705; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 2706; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2707; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2708; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 2709; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2710; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2711; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 2712; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2713; CHECK-SSE2-NEXT: psrld $31, %xmm0 2714; CHECK-SSE2-NEXT: retq 2715; 2716; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo_and_one: 2717; CHECK-SSE41: # %bb.0: 2718; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1717986919] 2719; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2720; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2721; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 2722; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 2723; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2724; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2725; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 2726; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3 2727; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7] 2728; CHECK-SSE41-NEXT: paddd %xmm1, %xmm3 2729; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm1 2730; CHECK-SSE41-NEXT: psrad $1, %xmm1 2731; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm4 2732; CHECK-SSE41-NEXT: psrad $3, %xmm4 2733; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7] 2734; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 2735; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7] 2736; CHECK-SSE41-NEXT: psrld $31, %xmm3 2737; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7] 2738; CHECK-SSE41-NEXT: paddd %xmm1, %xmm3 2739; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm3 2740; CHECK-SSE41-NEXT: psubd %xmm3, %xmm0 2741; CHECK-SSE41-NEXT: pcmpeqd %xmm2, %xmm0 2742; CHECK-SSE41-NEXT: psrld $31, %xmm0 2743; CHECK-SSE41-NEXT: retq 2744; 2745; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo_and_one: 2746; CHECK-AVX1: # %bb.0: 2747; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1717986919] 2748; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2749; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2750; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2751; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2752; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2753; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2754; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2755; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3,4,5],xmm2[6,7] 2756; CHECK-AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 2757; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3 2758; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm4 2759; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm3[4,5,6,7] 2760; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7] 2761; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] 2762; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 2763; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 2764; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 2765; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2766; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2767; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 2768; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2769; CHECK-AVX1-NEXT: retq 2770; 2771; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo_and_one: 2772; CHECK-AVX2: # %bb.0: 2773; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1717986919] 2774; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2775; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2776; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2777; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2778; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2779; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2780; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 2781; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1,2],xmm2[3] 2782; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 2783; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm3 2784; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm3[0,1],xmm2[2],xmm3[3] 2785; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 2786; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 2787; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2788; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2789; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 2790; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2791; CHECK-AVX2-NEXT: retq 2792; 2793; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo_and_one: 2794; CHECK-AVX512VL: # %bb.0: 2795; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2796; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2797; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2798; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2799; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2800; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2801; CHECK-AVX512VL-NEXT: retq 2802 %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5> 2803 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2804 %ret = zext <4 x i1> %cmp to <4 x i32> 2805 ret <4 x i32> %ret 2806} 2807 2808; One power-of-two divisor divisor and one divisor in even divisor 2809define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { 2810; CHECK-SSE2-LABEL: test_srem_even_poweroftwo_and_one: 2811; CHECK-SSE2: # %bb.0: 2812; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295] 2813; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 2814; CHECK-SSE2-NEXT: pand %xmm2, %xmm3 2815; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2816; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 2817; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 2818; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2147483649,0,2454267027] 2819; CHECK-SSE2-NEXT: pand %xmm5, %xmm4 2820; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4 2821; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 2822; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm3 2823; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 2824; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 2825; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3] 2826; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm6 2827; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,3,2,3] 2828; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] 2829; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3 2830; CHECK-SSE2-NEXT: paddd %xmm0, %xmm3 2831; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm4 2832; CHECK-SSE2-NEXT: psrad $3, %xmm4 2833; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm5 2834; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm4[3,0] 2835; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0,2] 2836; CHECK-SSE2-NEXT: psrld $31, %xmm3 2837; CHECK-SSE2-NEXT: pand %xmm2, %xmm3 2838; CHECK-SSE2-NEXT: paddd %xmm4, %xmm3 2839; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,16,1,14] 2840; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2841; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3 2842; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2843; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2844; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2 2845; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2846; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 2847; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0 2848; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2849; CHECK-SSE2-NEXT: psrld $31, %xmm0 2850; CHECK-SSE2-NEXT: retq 2851; 2852; CHECK-SSE41-LABEL: test_srem_even_poweroftwo_and_one: 2853; CHECK-SSE41: # %bb.0: 2854; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027] 2855; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2856; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2857; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 2858; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 2859; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2860; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2861; CHECK-SSE41-NEXT: paddd %xmm0, %xmm1 2862; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 2863; CHECK-SSE41-NEXT: psrad $3, %xmm2 2864; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 2865; CHECK-SSE41-NEXT: psrld $31, %xmm1 2866; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 2867; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 2868; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1 2869; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 2870; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 2871; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0 2872; CHECK-SSE41-NEXT: psrld $31, %xmm0 2873; CHECK-SSE41-NEXT: retq 2874; 2875; CHECK-AVX1-LABEL: test_srem_even_poweroftwo_and_one: 2876; CHECK-AVX1: # %bb.0: 2877; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027] 2878; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2879; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2880; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2881; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2882; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2883; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2884; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1 2885; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2 2886; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 2887; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 2888; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 2889; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 2890; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 2891; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2892; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2893; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2894; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2895; CHECK-AVX1-NEXT: retq 2896; 2897; CHECK-AVX2-LABEL: test_srem_even_poweroftwo_and_one: 2898; CHECK-AVX2: # %bb.0: 2899; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027] 2900; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2901; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2902; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 2903; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 2904; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2905; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2906; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm1 2907; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 2908; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 2909; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3] 2910; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 2911; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 2912; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2913; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2914; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 2915; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2916; CHECK-AVX2-NEXT: retq 2917; 2918; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo_and_one: 2919; CHECK-AVX512VL: # %bb.0: 2920; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2921; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 2922; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2923; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2924; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2925; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2926; CHECK-AVX512VL-NEXT: retq 2927 %srem = srem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14> 2928 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 2929 %ret = zext <4 x i1> %cmp to <4 x i32> 2930 ret <4 x i32> %ret 2931} 2932 2933; One power-of-two divisor divisor and one divisor in odd+even divisor 2934define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { 2935; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo_and_one: 2936; CHECK-SSE2: # %bb.0: 2937; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1717986919,2147483649,0,1374389535] 2938; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 2939; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 2940; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 2941; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 2942; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 2943; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4 2944; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3] 2945; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 2946; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2947; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 2948; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 2949; CHECK-SSE2-NEXT: pand %xmm2, %xmm4 2950; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,0,0] 2951; CHECK-SSE2-NEXT: pand %xmm0, %xmm2 2952; CHECK-SSE2-NEXT: paddd %xmm4, %xmm2 2953; CHECK-SSE2-NEXT: psubd %xmm2, %xmm3 2954; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,4294967295,0] 2955; CHECK-SSE2-NEXT: pand %xmm0, %xmm2 2956; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 2957; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 2958; CHECK-SSE2-NEXT: psrad $5, %xmm3 2959; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 2960; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1] 2961; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 2962; CHECK-SSE2-NEXT: psrad $3, %xmm3 2963; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5 2964; CHECK-SSE2-NEXT: psrad $1, %xmm5 2965; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0] 2966; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3] 2967; CHECK-SSE2-NEXT: psrld $31, %xmm2 2968; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2 2969; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2 2970; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,16,1,100] 2971; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 2972; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 2973; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2974; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 2975; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 2976; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2977; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2978; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 2979; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2980; CHECK-SSE2-NEXT: psrld $31, %xmm0 2981; CHECK-SSE2-NEXT: retq 2982; 2983; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo_and_one: 2984; CHECK-SSE41: # %bb.0: 2985; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1374389535] 2986; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2987; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2988; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3 2989; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1 2990; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2991; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2992; CHECK-SSE41-NEXT: pxor %xmm2, %xmm2 2993; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm3 2994; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7] 2995; CHECK-SSE41-NEXT: paddd %xmm1, %xmm3 2996; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm1 2997; CHECK-SSE41-NEXT: psrad $5, %xmm1 2998; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm4 2999; CHECK-SSE41-NEXT: psrad $3, %xmm4 3000; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7] 3001; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm1 3002; CHECK-SSE41-NEXT: psrad $1, %xmm1 3003; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 3004; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7] 3005; CHECK-SSE41-NEXT: psrld $31, %xmm3 3006; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7] 3007; CHECK-SSE41-NEXT: paddd %xmm1, %xmm3 3008; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm3 3009; CHECK-SSE41-NEXT: psubd %xmm3, %xmm0 3010; CHECK-SSE41-NEXT: pcmpeqd %xmm2, %xmm0 3011; CHECK-SSE41-NEXT: psrld $31, %xmm0 3012; CHECK-SSE41-NEXT: retq 3013; 3014; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo_and_one: 3015; CHECK-AVX1: # %bb.0: 3016; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1374389535] 3017; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 3018; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 3019; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 3020; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 3021; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3022; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3023; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 3024; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2,3,4,5],xmm2[6,7] 3025; CHECK-AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 3026; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm3 3027; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm4 3028; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7] 3029; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm4 3030; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7] 3031; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7] 3032; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 3033; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 3034; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1 3035; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 3036; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 3037; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 3038; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 3039; CHECK-AVX1-NEXT: retq 3040; 3041; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo_and_one: 3042; CHECK-AVX2: # %bb.0: 3043; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1374389535] 3044; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 3045; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 3046; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 3047; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 3048; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3049; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 3050; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 3051; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm2[0],xmm0[1,2],xmm2[3] 3052; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 3053; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm3 3054; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm3[0,1],xmm2[2],xmm3[3] 3055; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 3056; CHECK-AVX2-NEXT: vpaddd %xmm3, %xmm1, %xmm1 3057; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 3058; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 3059; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 3060; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 3061; CHECK-AVX2-NEXT: retq 3062; 3063; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo_and_one: 3064; CHECK-AVX512VL: # %bb.0: 3065; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 3066; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 3067; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 3068; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 3069; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 3070; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 3071; CHECK-AVX512VL-NEXT: retq 3072 %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100> 3073 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 3074 %ret = zext <4 x i1> %cmp to <4 x i32> 3075 ret <4 x i32> %ret 3076} 3077 3078;------------------------------------------------------------------------------; 3079 3080define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { 3081; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 3082; CHECK-SSE2: # %bb.0: 3083; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,1] 3084; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 3085; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 3086; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3] 3087; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3088; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 3089; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 3090; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 3091; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 3092; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 3093; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4 3094; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4 3095; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1717986919,0,2147483649,0] 3096; CHECK-SSE2-NEXT: pand %xmm2, %xmm4 3097; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,0,4294967295,0] 3098; CHECK-SSE2-NEXT: pand %xmm0, %xmm5 3099; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5 3100; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm2 3101; CHECK-SSE2-NEXT: psrlq $32, %xmm2 3102; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2 3103; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2 3104; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 3105; CHECK-SSE2-NEXT: psrad $3, %xmm3 3106; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm2[1] 3107; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4 3108; CHECK-SSE2-NEXT: psrad $1, %xmm4 3109; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm2[0] 3110; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3],xmm3[0,3] 3111; CHECK-SSE2-NEXT: psrld $31, %xmm2 3112; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2 3113; CHECK-SSE2-NEXT: paddd %xmm4, %xmm2 3114; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,16,1] 3115; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 3116; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 3117; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 3118; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3119; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 3120; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 3121; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 3122; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 3123; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 3124; CHECK-SSE2-NEXT: psrld $31, %xmm0 3125; CHECK-SSE2-NEXT: retq 3126; 3127; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 3128; CHECK-SSE41: # %bb.0: 3129; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,1] 3130; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1 3131; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <1717986919,u,2147483649,u> 3132; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 3133; CHECK-SSE41-NEXT: psrlq $32, %xmm2 3134; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 3135; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 3136; CHECK-SSE41-NEXT: psrad $3, %xmm1 3137; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3 3138; CHECK-SSE41-NEXT: psrad $1, %xmm3 3139; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7] 3140; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 3141; CHECK-SSE41-NEXT: psrld $31, %xmm2 3142; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 3143; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 3144; CHECK-SSE41-NEXT: paddd %xmm3, %xmm2 3145; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 3146; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 3147; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 3148; CHECK-SSE41-NEXT: psrld $31, %xmm0 3149; CHECK-SSE41-NEXT: retq 3150; 3151; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 3152; CHECK-AVX1: # %bb.0: 3153; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1 3154; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 3155; CHECK-AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2 3156; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 3157; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2 3158; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3 3159; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 3160; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 3161; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 3162; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 3163; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 3164; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 3165; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 3166; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 3167; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 3168; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 3169; CHECK-AVX1-NEXT: retq 3170; 3171; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 3172; CHECK-AVX2: # %bb.0: 3173; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1 3174; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 3175; CHECK-AVX2-NEXT: vpsrlq $32, %xmm2, %xmm2 3176; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm2, %xmm1 3177; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 3178; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 3179; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3] 3180; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 3181; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 3182; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 3183; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 3184; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 3185; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 3186; CHECK-AVX2-NEXT: retq 3187; 3188; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo_and_one: 3189; CHECK-AVX512VL: # %bb.0: 3190; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 3191; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 3192; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 3193; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 3194; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 3195; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 3196; CHECK-AVX512VL-NEXT: retq 3197 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1> 3198 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 3199 %ret = zext <4 x i1> %cmp to <4 x i32> 3200 ret <4 x i32> %ret 3201} 3202 3203define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { 3204; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 3205; CHECK-SSE2: # %bb.0: 3206; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,4294967295,1,1] 3207; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 3208; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 3209; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 3210; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3211; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 3212; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 3213; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 3214; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3215; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,0,4294967295,0] 3216; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4 3217; CHECK-SSE2-NEXT: pand %xmm3, %xmm4 3218; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 3219; CHECK-SSE2-NEXT: pxor %xmm5, %xmm5 3220; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm5 3221; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2454267027,0,2147483649,0] 3222; CHECK-SSE2-NEXT: pand %xmm6, %xmm5 3223; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5 3224; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm6 3225; CHECK-SSE2-NEXT: psrlq $32, %xmm6 3226; CHECK-SSE2-NEXT: psubd %xmm5, %xmm6 3227; CHECK-SSE2-NEXT: paddd %xmm2, %xmm6 3228; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,3,2,3] 3229; CHECK-SSE2-NEXT: movdqa %xmm6, %xmm4 3230; CHECK-SSE2-NEXT: psrad $3, %xmm4 3231; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 3232; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 3233; CHECK-SSE2-NEXT: psrld $31, %xmm6 3234; CHECK-SSE2-NEXT: pand %xmm3, %xmm6 3235; CHECK-SSE2-NEXT: paddd %xmm4, %xmm6 3236; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,4294967295,16,1] 3237; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3] 3238; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm6 3239; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm6[0,2,2,3] 3240; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3241; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 3242; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 3243; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 3244; CHECK-SSE2-NEXT: psubd %xmm4, %xmm0 3245; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 3246; CHECK-SSE2-NEXT: psrld $31, %xmm0 3247; CHECK-SSE2-NEXT: retq 3248; 3249; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 3250; CHECK-SSE41: # %bb.0: 3251; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,4294967295,1,1] 3252; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1 3253; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,2147483649,u> 3254; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2 3255; CHECK-SSE41-NEXT: psrlq $32, %xmm2 3256; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 3257; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1 3258; CHECK-SSE41-NEXT: psrad $3, %xmm1 3259; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3260; CHECK-SSE41-NEXT: psrld $31, %xmm2 3261; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3 3262; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 3263; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2 3264; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 3265; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 3266; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0 3267; CHECK-SSE41-NEXT: psrld $31, %xmm0 3268; CHECK-SSE41-NEXT: retq 3269; 3270; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 3271; CHECK-AVX1: # %bb.0: 3272; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1 3273; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 3274; CHECK-AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2 3275; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 3276; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2 3277; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 3278; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 3279; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 3280; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 3281; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1 3282; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 3283; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 3284; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 3285; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 3286; CHECK-AVX1-NEXT: retq 3287; 3288; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 3289; CHECK-AVX2: # %bb.0: 3290; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1 3291; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2 3292; CHECK-AVX2-NEXT: vpsrlq $32, %xmm2, %xmm2 3293; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm2, %xmm1 3294; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2 3295; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 3296; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3] 3297; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1 3298; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1 3299; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 3300; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 3301; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 3302; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 3303; CHECK-AVX2-NEXT: retq 3304; 3305; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo_and_one: 3306; CHECK-AVX512VL: # %bb.0: 3307; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 3308; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 3309; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 3310; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 3311; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 3312; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 3313; CHECK-AVX512VL-NEXT: retq 3314 %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1> 3315 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0> 3316 %ret = zext <4 x i1> %cmp to <4 x i32> 3317 ret <4 x i32> %ret 3318} 3319