1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=CHECK-SSE41 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2 6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL 7 8; Odd+Even divisors 9define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind { 10; CHECK-SSE2-LABEL: test_urem_odd_even: 11; CHECK-SSE2: # %bb.0: 12; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2454267027,1374389535,1374389535] 13; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 14; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 15; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 16; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 17; CHECK-SSE2-NEXT: psrld $1, %xmm3 18; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3] 19; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 20; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 21; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 22; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 23; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 24; CHECK-SSE2-NEXT: psrld $2, %xmm3 25; CHECK-SSE2-NEXT: psrld $3, %xmm2 26; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1] 27; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [5,14,25,100] 28; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2 29; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 30; CHECK-SSE2-NEXT: psrld $5, %xmm1 31; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3] 32; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] 33; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 34; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 35; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 36; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 37; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 38; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 39; CHECK-SSE2-NEXT: psrld $31, %xmm0 40; CHECK-SSE2-NEXT: retq 41; 42; CHECK-SSE41-LABEL: test_urem_odd_even: 43; CHECK-SSE41: # %bb.0: 44; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 45; CHECK-SSE41-NEXT: psrld $1, %xmm1 46; CHECK-SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 47; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,2454267027,1374389535,1374389535] 48; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 49; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3 50; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 51; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 52; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 53; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 54; CHECK-SSE41-NEXT: psrld $2, %xmm2 55; CHECK-SSE41-NEXT: psrld $5, %xmm3 56; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7] 57; CHECK-SSE41-NEXT: psrld $3, %xmm1 58; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 59; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 60; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 61; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 62; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 63; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 64; CHECK-SSE41-NEXT: psrld $31, %xmm0 65; CHECK-SSE41-NEXT: retq 66; 67; CHECK-AVX1-LABEL: test_urem_odd_even: 68; CHECK-AVX1: # %bb.0: 69; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 70; CHECK-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 71; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,1374389535,1374389535] 72; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 73; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 74; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 75; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 76; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 77; CHECK-AVX1-NEXT: vpsrld $2, %xmm3, %xmm3 78; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 79; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] 80; CHECK-AVX1-NEXT: vpsrld $3, %xmm2, %xmm2 81; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 82; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 83; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 84; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 85; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 86; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 87; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 88; CHECK-AVX1-NEXT: retq 89; 90; CHECK-AVX2-LABEL: test_urem_odd_even: 91; CHECK-AVX2: # %bb.0: 92; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,1374389535,1374389535] 93; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 94; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 95; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 96; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 97; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 98; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 99; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 100; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 101; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 102; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 103; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 104; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 105; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 106; CHECK-AVX2-NEXT: retq 107; 108; CHECK-AVX512VL-LABEL: test_urem_odd_even: 109; CHECK-AVX512VL: # %bb.0: 110; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 111; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 112; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 113; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 114; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 115; CHECK-AVX512VL-NEXT: retq 116 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100> 117 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 118 %ret = zext <4 x i1> %cmp to <4 x i32> 119 ret <4 x i32> %ret 120} 121 122;==============================================================================; 123 124; One all-ones divisor in odd divisor 125define <4 x i32> @test_urem_odd_allones_eq(<4 x i32> %X) nounwind { 126; CHECK-SSE2-LABEL: test_urem_odd_allones_eq: 127; CHECK-SSE2: # %bb.0: 128; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 129; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm0 130; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 131; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 132; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 133; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 134; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 135; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 136; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 137; CHECK-SSE2-NEXT: retq 138; 139; CHECK-SSE41-LABEL: test_urem_odd_allones_eq: 140; CHECK-SSE41: # %bb.0: 141; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 142; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,1,858993459] 143; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 144; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 145; CHECK-SSE41-NEXT: psrld $31, %xmm0 146; CHECK-SSE41-NEXT: retq 147; 148; CHECK-AVX-LABEL: test_urem_odd_allones_eq: 149; CHECK-AVX: # %bb.0: 150; CHECK-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 151; CHECK-AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 152; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 153; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 154; CHECK-AVX-NEXT: retq 155 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5> 156 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 157 %ret = zext <4 x i1> %cmp to <4 x i32> 158 ret <4 x i32> %ret 159} 160define <4 x i32> @test_urem_odd_allones_ne(<4 x i32> %X) nounwind { 161; CHECK-SSE2-LABEL: test_urem_odd_allones_ne: 162; CHECK-SSE2: # %bb.0: 163; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 164; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm0 165; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 166; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 167; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 168; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 169; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 170; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 171; CHECK-SSE2-NEXT: psrld $31, %xmm0 172; CHECK-SSE2-NEXT: retq 173; 174; CHECK-SSE41-LABEL: test_urem_odd_allones_ne: 175; CHECK-SSE41: # %bb.0: 176; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 177; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993460,858993460,2,858993460] 178; CHECK-SSE41-NEXT: pmaxud %xmm0, %xmm1 179; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 180; CHECK-SSE41-NEXT: psrld $31, %xmm0 181; CHECK-SSE41-NEXT: retq 182; 183; CHECK-AVX-LABEL: test_urem_odd_allones_ne: 184; CHECK-AVX: # %bb.0: 185; CHECK-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 186; CHECK-AVX-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm1 187; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 188; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 189; CHECK-AVX-NEXT: retq 190 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5> 191 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 192 %ret = zext <4 x i1> %cmp to <4 x i32> 193 ret <4 x i32> %ret 194} 195 196; One all-ones divisor in even divisor 197define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind { 198; CHECK-SSE2-LABEL: test_urem_even_allones_eq: 199; CHECK-SSE2: # %bb.0: 200; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 201; CHECK-SSE2-NEXT: psrld $1, %xmm1 202; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 203; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 204; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 205; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 206; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 207; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 208; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 209; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 210; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 211; CHECK-SSE2-NEXT: psrld $2, %xmm1 212; CHECK-SSE2-NEXT: psrld $31, %xmm2 213; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 214; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 215; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 216; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 217; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 218; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 219; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 220; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 221; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 222; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 223; CHECK-SSE2-NEXT: psrld $31, %xmm0 224; CHECK-SSE2-NEXT: retq 225; 226; CHECK-SSE41-LABEL: test_urem_even_allones_eq: 227; CHECK-SSE41: # %bb.0: 228; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 229; CHECK-SSE41-NEXT: psrld $1, %xmm1 230; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 231; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 232; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1 233; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 234; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2 235; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 236; CHECK-SSE41-NEXT: psrld $31, %xmm1 237; CHECK-SSE41-NEXT: psrld $2, %xmm2 238; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 239; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 240; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 241; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 242; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 243; CHECK-SSE41-NEXT: psrld $31, %xmm0 244; CHECK-SSE41-NEXT: retq 245; 246; CHECK-AVX1-LABEL: test_urem_even_allones_eq: 247; CHECK-AVX1: # %bb.0: 248; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 249; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 250; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2 251; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 252; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm3 253; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 254; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 255; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 256; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 257; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 258; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 259; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 260; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 261; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 262; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 263; CHECK-AVX1-NEXT: retq 264; 265; CHECK-AVX2-LABEL: test_urem_even_allones_eq: 266; CHECK-AVX2: # %bb.0: 267; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1 268; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 269; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 270; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 271; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 272; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 273; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 274; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 275; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 276; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 277; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 278; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 279; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 280; CHECK-AVX2-NEXT: retq 281; 282; CHECK-AVX512VL-LABEL: test_urem_even_allones_eq: 283; CHECK-AVX512VL: # %bb.0: 284; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 285; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 286; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 287; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 288; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 289; CHECK-AVX512VL-NEXT: retq 290 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14> 291 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 292 %ret = zext <4 x i1> %cmp to <4 x i32> 293 ret <4 x i32> %ret 294} 295define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind { 296; CHECK-SSE2-LABEL: test_urem_even_allones_ne: 297; CHECK-SSE2: # %bb.0: 298; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 299; CHECK-SSE2-NEXT: psrld $1, %xmm1 300; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 301; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 302; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 303; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 304; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 305; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 306; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 307; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 308; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 309; CHECK-SSE2-NEXT: psrld $2, %xmm1 310; CHECK-SSE2-NEXT: psrld $31, %xmm2 311; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 312; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 313; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 314; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 315; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 316; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 317; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 318; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 319; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 320; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 321; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 322; CHECK-SSE2-NEXT: retq 323; 324; CHECK-SSE41-LABEL: test_urem_even_allones_ne: 325; CHECK-SSE41: # %bb.0: 326; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 327; CHECK-SSE41-NEXT: psrld $1, %xmm1 328; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 329; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 330; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1 331; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 332; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2 333; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 334; CHECK-SSE41-NEXT: psrld $31, %xmm1 335; CHECK-SSE41-NEXT: psrld $2, %xmm2 336; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 337; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 338; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 339; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 340; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 341; CHECK-SSE41-NEXT: pandn {{.*}}(%rip), %xmm0 342; CHECK-SSE41-NEXT: retq 343; 344; CHECK-AVX1-LABEL: test_urem_even_allones_ne: 345; CHECK-AVX1: # %bb.0: 346; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 347; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 348; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2 349; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 350; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm3 351; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 352; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 353; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 354; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 355; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7] 356; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 357; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 358; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 359; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 360; CHECK-AVX1-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 361; CHECK-AVX1-NEXT: retq 362; 363; CHECK-AVX2-LABEL: test_urem_even_allones_ne: 364; CHECK-AVX2: # %bb.0: 365; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1 366; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 367; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 368; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 369; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 370; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 371; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 372; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 373; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 374; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 375; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 376; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 377; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 378; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 379; CHECK-AVX2-NEXT: retq 380; 381; CHECK-AVX512VL-LABEL: test_urem_even_allones_ne: 382; CHECK-AVX512VL: # %bb.0: 383; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 384; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 385; CHECK-AVX512VL-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm1 386; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 387; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 388; CHECK-AVX512VL-NEXT: retq 389 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14> 390 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 391 %ret = zext <4 x i1> %cmp to <4 x i32> 392 ret <4 x i32> %ret 393} 394 395; One all-ones divisor in odd+even divisor 396define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind { 397; CHECK-SSE2-LABEL: test_urem_odd_even_allones_eq: 398; CHECK-SSE2: # %bb.0: 399; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2454267027,2147483649,1374389535] 400; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 401; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 402; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 403; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 404; CHECK-SSE2-NEXT: psrld $1, %xmm3 405; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3] 406; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 407; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 408; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 409; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 410; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 411; CHECK-SSE2-NEXT: psrld $2, %xmm3 412; CHECK-SSE2-NEXT: psrld $31, %xmm2 413; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1] 414; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [5,14,4294967295,100] 415; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2 416; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 417; CHECK-SSE2-NEXT: psrld $5, %xmm1 418; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3] 419; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] 420; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 421; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 422; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 423; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 424; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 425; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 426; CHECK-SSE2-NEXT: psrld $31, %xmm0 427; CHECK-SSE2-NEXT: retq 428; 429; CHECK-SSE41-LABEL: test_urem_odd_even_allones_eq: 430; CHECK-SSE41: # %bb.0: 431; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 432; CHECK-SSE41-NEXT: psrld $1, %xmm1 433; CHECK-SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 434; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,2454267027,2147483649,1374389535] 435; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 436; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3 437; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 438; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 439; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 440; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 441; CHECK-SSE41-NEXT: psrld $2, %xmm2 442; CHECK-SSE41-NEXT: psrld $5, %xmm3 443; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7] 444; CHECK-SSE41-NEXT: psrld $31, %xmm1 445; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 446; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 447; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 448; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 449; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 450; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 451; CHECK-SSE41-NEXT: psrld $31, %xmm0 452; CHECK-SSE41-NEXT: retq 453; 454; CHECK-AVX1-LABEL: test_urem_odd_even_allones_eq: 455; CHECK-AVX1: # %bb.0: 456; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 457; CHECK-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 458; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,2147483649,1374389535] 459; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 460; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 461; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 462; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 463; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 464; CHECK-AVX1-NEXT: vpsrld $2, %xmm3, %xmm3 465; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 466; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] 467; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 468; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 469; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 470; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 471; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 472; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 473; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 474; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 475; CHECK-AVX1-NEXT: retq 476; 477; CHECK-AVX2-LABEL: test_urem_odd_even_allones_eq: 478; CHECK-AVX2: # %bb.0: 479; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,2147483649,1374389535] 480; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 481; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 482; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 483; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 484; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 485; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 486; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 487; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 488; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 489; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 490; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 491; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 492; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 493; CHECK-AVX2-NEXT: retq 494; 495; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_eq: 496; CHECK-AVX512VL: # %bb.0: 497; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 498; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 499; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 500; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 501; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 502; CHECK-AVX512VL-NEXT: retq 503 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100> 504 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 505 %ret = zext <4 x i1> %cmp to <4 x i32> 506 ret <4 x i32> %ret 507} 508define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind { 509; CHECK-SSE2-LABEL: test_urem_odd_even_allones_ne: 510; CHECK-SSE2: # %bb.0: 511; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2454267027,2147483649,1374389535] 512; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 513; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 514; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 515; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 516; CHECK-SSE2-NEXT: psrld $1, %xmm3 517; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3] 518; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 519; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 520; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 521; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 522; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 523; CHECK-SSE2-NEXT: psrld $2, %xmm3 524; CHECK-SSE2-NEXT: psrld $31, %xmm2 525; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1] 526; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [5,14,4294967295,100] 527; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2 528; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 529; CHECK-SSE2-NEXT: psrld $5, %xmm1 530; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3] 531; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] 532; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 533; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 534; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 535; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 536; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 537; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 538; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 539; CHECK-SSE2-NEXT: retq 540; 541; CHECK-SSE41-LABEL: test_urem_odd_even_allones_ne: 542; CHECK-SSE41: # %bb.0: 543; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 544; CHECK-SSE41-NEXT: psrld $1, %xmm1 545; CHECK-SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 546; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,2454267027,2147483649,1374389535] 547; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 548; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3 549; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 550; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 551; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 552; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 553; CHECK-SSE41-NEXT: psrld $2, %xmm2 554; CHECK-SSE41-NEXT: psrld $5, %xmm3 555; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7] 556; CHECK-SSE41-NEXT: psrld $31, %xmm1 557; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 558; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 559; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 560; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 561; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 562; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 563; CHECK-SSE41-NEXT: pandn {{.*}}(%rip), %xmm0 564; CHECK-SSE41-NEXT: retq 565; 566; CHECK-AVX1-LABEL: test_urem_odd_even_allones_ne: 567; CHECK-AVX1: # %bb.0: 568; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 569; CHECK-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 570; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,2147483649,1374389535] 571; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 572; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 573; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 574; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 575; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 576; CHECK-AVX1-NEXT: vpsrld $2, %xmm3, %xmm3 577; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 578; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] 579; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 580; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 581; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 582; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 583; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 584; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 585; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 586; CHECK-AVX1-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 587; CHECK-AVX1-NEXT: retq 588; 589; CHECK-AVX2-LABEL: test_urem_odd_even_allones_ne: 590; CHECK-AVX2: # %bb.0: 591; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,2147483649,1374389535] 592; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 593; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 594; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 595; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 596; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 597; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 598; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 599; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 600; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 601; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 602; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 603; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 604; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 605; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0 606; CHECK-AVX2-NEXT: retq 607; 608; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_ne: 609; CHECK-AVX512VL: # %bb.0: 610; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 611; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 612; CHECK-AVX512VL-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm1 613; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 614; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 615; CHECK-AVX512VL-NEXT: retq 616 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100> 617 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 618 %ret = zext <4 x i1> %cmp to <4 x i32> 619 ret <4 x i32> %ret 620} 621 622;------------------------------------------------------------------------------; 623 624; One power-of-two divisor in odd divisor 625define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind { 626; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo: 627; CHECK-SSE2: # %bb.0: 628; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = <3435973837,u,268435456,u> 629; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 630; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 631; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 632; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 633; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 634; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 635; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2 636; CHECK-SSE2-NEXT: psrld $2, %xmm2 637; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 638; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 639; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 640; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 641; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 642; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 643; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 644; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 645; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 646; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 647; CHECK-SSE2-NEXT: psrld $31, %xmm0 648; CHECK-SSE2-NEXT: retq 649; 650; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo: 651; CHECK-SSE41: # %bb.0: 652; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 653; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1 654; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <3435973837,u,268435456,u> 655; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 656; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 657; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 658; CHECK-SSE41-NEXT: psrld $2, %xmm1 659; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 660; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 661; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 662; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 663; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 664; CHECK-SSE41-NEXT: psrld $31, %xmm0 665; CHECK-SSE41-NEXT: retq 666; 667; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo: 668; CHECK-AVX1: # %bb.0: 669; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 670; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 671; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm2 672; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 673; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 674; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 675; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 676; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 677; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 678; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 679; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 680; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 681; CHECK-AVX1-NEXT: retq 682; 683; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo: 684; CHECK-AVX2: # %bb.0: 685; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 686; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837] 687; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 688; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm2 689; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 690; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 691; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 692; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 693; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 694; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 695; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 696; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 697; CHECK-AVX2-NEXT: retq 698; 699; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo: 700; CHECK-AVX512VL: # %bb.0: 701; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 702; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 703; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 704; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 705; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 706; CHECK-AVX512VL-NEXT: retq 707 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5> 708 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 709 %ret = zext <4 x i1> %cmp to <4 x i32> 710 ret <4 x i32> %ret 711} 712 713; One power-of-two divisor in even divisor 714define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind { 715; CHECK-SSE2-LABEL: test_urem_even_poweroftwo: 716; CHECK-SSE2: # %bb.0: 717; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 718; CHECK-SSE2-NEXT: psrld $1, %xmm1 719; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 720; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 721; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 722; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 723; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 724; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 725; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 726; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 727; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 728; CHECK-SSE2-NEXT: psrld $2, %xmm1 729; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 730; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 731; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 732; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 733; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 734; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 735; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 736; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 737; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 738; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 739; CHECK-SSE2-NEXT: psrld $31, %xmm0 740; CHECK-SSE2-NEXT: retq 741; 742; CHECK-SSE41-LABEL: test_urem_even_poweroftwo: 743; CHECK-SSE41: # %bb.0: 744; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 745; CHECK-SSE41-NEXT: psrld $1, %xmm1 746; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 747; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 748; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1 749; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 750; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2 751; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 752; CHECK-SSE41-NEXT: psrld $2, %xmm2 753; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 754; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 755; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 756; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 757; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 758; CHECK-SSE41-NEXT: psrld $31, %xmm0 759; CHECK-SSE41-NEXT: retq 760; 761; CHECK-AVX1-LABEL: test_urem_even_poweroftwo: 762; CHECK-AVX1: # %bb.0: 763; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 764; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 765; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2 766; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 767; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 768; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 769; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 770; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 771; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 772; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 773; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 774; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 775; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 776; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 777; CHECK-AVX1-NEXT: retq 778; 779; CHECK-AVX2-LABEL: test_urem_even_poweroftwo: 780; CHECK-AVX2: # %bb.0: 781; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1 782; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 783; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 784; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 785; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 786; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 787; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 788; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 789; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 790; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 791; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 792; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 793; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 794; CHECK-AVX2-NEXT: retq 795; 796; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo: 797; CHECK-AVX512VL: # %bb.0: 798; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 799; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 800; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 801; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 802; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 803; CHECK-AVX512VL-NEXT: retq 804 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14> 805 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 806 %ret = zext <4 x i1> %cmp to <4 x i32> 807 ret <4 x i32> %ret 808} 809 810; One power-of-two divisor in odd+even divisor 811define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind { 812; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo: 813; CHECK-SSE2: # %bb.0: 814; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2454267027,268435456,1374389535] 815; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 816; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 817; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 818; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 819; CHECK-SSE2-NEXT: psrld $1, %xmm3 820; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3] 821; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 822; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 823; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 824; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 825; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 826; CHECK-SSE2-NEXT: psrld $2, %xmm3 827; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1] 828; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [5,14,16,100] 829; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2 830; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 831; CHECK-SSE2-NEXT: psrld $5, %xmm1 832; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3] 833; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] 834; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 835; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 836; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 837; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 838; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 839; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 840; CHECK-SSE2-NEXT: psrld $31, %xmm0 841; CHECK-SSE2-NEXT: retq 842; 843; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo: 844; CHECK-SSE41: # %bb.0: 845; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 846; CHECK-SSE41-NEXT: psrld $1, %xmm1 847; CHECK-SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 848; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,2454267027,268435456,1374389535] 849; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 850; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3 851; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 852; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 853; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 854; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 855; CHECK-SSE41-NEXT: psrld $2, %xmm2 856; CHECK-SSE41-NEXT: psrld $5, %xmm3 857; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7] 858; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 859; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 860; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 861; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 862; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 863; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 864; CHECK-SSE41-NEXT: psrld $31, %xmm0 865; CHECK-SSE41-NEXT: retq 866; 867; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo: 868; CHECK-AVX1: # %bb.0: 869; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 870; CHECK-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 871; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,268435456,1374389535] 872; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 873; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 874; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 875; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 876; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 877; CHECK-AVX1-NEXT: vpsrld $2, %xmm3, %xmm3 878; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 879; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] 880; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 881; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 882; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 883; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 884; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 885; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 886; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 887; CHECK-AVX1-NEXT: retq 888; 889; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo: 890; CHECK-AVX2: # %bb.0: 891; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,268435456,1374389535] 892; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 893; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 894; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 895; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 896; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 897; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 898; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 899; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 900; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 901; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 902; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 903; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 904; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 905; CHECK-AVX2-NEXT: retq 906; 907; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo: 908; CHECK-AVX512VL: # %bb.0: 909; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 910; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 911; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 912; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 913; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 914; CHECK-AVX512VL-NEXT: retq 915 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100> 916 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 917 %ret = zext <4 x i1> %cmp to <4 x i32> 918 ret <4 x i32> %ret 919} 920 921;------------------------------------------------------------------------------; 922 923; One one divisor in odd divisor 924define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind { 925; CHECK-SSE2-LABEL: test_urem_odd_one: 926; CHECK-SSE2: # %bb.0: 927; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 928; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 929; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 930; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 931; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 932; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 933; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 934; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 935; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 936; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 937; CHECK-SSE2-NEXT: retq 938; 939; CHECK-SSE41-LABEL: test_urem_odd_one: 940; CHECK-SSE41: # %bb.0: 941; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 942; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,4294967295,858993459] 943; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 944; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 945; CHECK-SSE41-NEXT: psrld $31, %xmm0 946; CHECK-SSE41-NEXT: retq 947; 948; CHECK-AVX1-LABEL: test_urem_odd_one: 949; CHECK-AVX1: # %bb.0: 950; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 951; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 952; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 953; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 954; CHECK-AVX1-NEXT: retq 955; 956; CHECK-AVX2-LABEL: test_urem_odd_one: 957; CHECK-AVX2: # %bb.0: 958; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837] 959; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 960; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 961; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 962; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 963; CHECK-AVX2-NEXT: retq 964; 965; CHECK-AVX512VL-LABEL: test_urem_odd_one: 966; CHECK-AVX512VL: # %bb.0: 967; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 968; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 969; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 970; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 971; CHECK-AVX512VL-NEXT: retq 972 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5> 973 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 974 %ret = zext <4 x i1> %cmp to <4 x i32> 975 ret <4 x i32> %ret 976} 977 978; One one divisor in even divisor 979define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind { 980; CHECK-SSE2-LABEL: test_urem_even_one: 981; CHECK-SSE2: # %bb.0: 982; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 983; CHECK-SSE2-NEXT: psrld $1, %xmm1 984; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 985; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 986; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 987; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 988; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 989; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 990; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 991; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 992; CHECK-SSE2-NEXT: psrld $2, %xmm2 993; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 994; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 995; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 996; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3] 997; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 998; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 999; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1000; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 1001; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1002; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1003; CHECK-SSE2-NEXT: psrld $31, %xmm0 1004; CHECK-SSE2-NEXT: retq 1005; 1006; CHECK-SSE41-LABEL: test_urem_even_one: 1007; CHECK-SSE41: # %bb.0: 1008; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 1009; CHECK-SSE41-NEXT: psrld $1, %xmm1 1010; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1011; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 1012; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1 1013; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1014; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2 1015; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1016; CHECK-SSE41-NEXT: psrld $2, %xmm2 1017; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] 1018; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 1019; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 1020; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1021; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1022; CHECK-SSE41-NEXT: psrld $31, %xmm0 1023; CHECK-SSE41-NEXT: retq 1024; 1025; CHECK-AVX1-LABEL: test_urem_even_one: 1026; CHECK-AVX1: # %bb.0: 1027; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 1028; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 1029; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2 1030; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1031; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1032; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 1033; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1034; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 1035; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 1036; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1037; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1038; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1039; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1040; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1041; CHECK-AVX1-NEXT: retq 1042; 1043; CHECK-AVX2-LABEL: test_urem_even_one: 1044; CHECK-AVX2: # %bb.0: 1045; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1 1046; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1047; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 1048; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 1049; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 1050; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1051; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1052; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1 1053; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] 1054; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1055; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1056; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1057; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1058; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1059; CHECK-AVX2-NEXT: retq 1060; 1061; CHECK-AVX512VL-LABEL: test_urem_even_one: 1062; CHECK-AVX512VL: # %bb.0: 1063; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 1064; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0 1065; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1066; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1067; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1068; CHECK-AVX512VL-NEXT: retq 1069 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14> 1070 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1071 %ret = zext <4 x i1> %cmp to <4 x i32> 1072 ret <4 x i32> %ret 1073} 1074 1075; One one divisor in odd+even divisor 1076define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind { 1077; CHECK-SSE2-LABEL: test_urem_odd_even_one: 1078; CHECK-SSE2: # %bb.0: 1079; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2454267027,0,1374389535] 1080; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 1081; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 1082; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 1083; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 1084; CHECK-SSE2-NEXT: psrld $1, %xmm3 1085; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3] 1086; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1087; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 1088; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 1089; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1090; CHECK-SSE2-NEXT: psrld $2, %xmm2 1091; CHECK-SSE2-NEXT: psrld $5, %xmm1 1092; CHECK-SSE2-NEXT: movaps %xmm0, %xmm3 1093; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1] 1094; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[3,3] 1095; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [5,14,1,100] 1096; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 1097; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm4 1098; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3] 1099; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 1100; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 1101; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1102; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 1103; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1104; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1105; CHECK-SSE2-NEXT: psrld $31, %xmm0 1106; CHECK-SSE2-NEXT: retq 1107; 1108; CHECK-SSE41-LABEL: test_urem_odd_even_one: 1109; CHECK-SSE41: # %bb.0: 1110; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 1111; CHECK-SSE41-NEXT: psrld $1, %xmm1 1112; CHECK-SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 1113; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,2454267027,0,1374389535] 1114; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1115; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3 1116; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 1117; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1118; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 1119; CHECK-SSE41-NEXT: psrld $2, %xmm1 1120; CHECK-SSE41-NEXT: psrld $5, %xmm3 1121; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7] 1122; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm0[4,5],xmm3[6,7] 1123; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm3 1124; CHECK-SSE41-NEXT: psubd %xmm3, %xmm0 1125; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1126; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1127; CHECK-SSE41-NEXT: psrld $31, %xmm0 1128; CHECK-SSE41-NEXT: retq 1129; 1130; CHECK-AVX1-LABEL: test_urem_odd_even_one: 1131; CHECK-AVX1: # %bb.0: 1132; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 1133; CHECK-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 1134; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,0,1374389535] 1135; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1136; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 1137; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 1138; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1139; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1140; CHECK-AVX1-NEXT: vpsrld $2, %xmm2, %xmm2 1141; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 1142; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 1143; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 1144; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1145; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1146; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1147; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1148; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1149; CHECK-AVX1-NEXT: retq 1150; 1151; CHECK-AVX2-LABEL: test_urem_odd_even_one: 1152; CHECK-AVX2: # %bb.0: 1153; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,0,1374389535] 1154; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1155; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 1156; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1157; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 1158; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 1159; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1160; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1161; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 1162; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] 1163; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1164; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1165; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1166; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1167; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1168; CHECK-AVX2-NEXT: retq 1169; 1170; CHECK-AVX512VL-LABEL: test_urem_odd_even_one: 1171; CHECK-AVX512VL: # %bb.0: 1172; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1173; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1174; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1175; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1176; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1177; CHECK-AVX512VL-NEXT: retq 1178 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100> 1179 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1180 %ret = zext <4 x i1> %cmp to <4 x i32> 1181 ret <4 x i32> %ret 1182} 1183 1184;------------------------------------------------------------------------------; 1185 1186; One INT_MIN divisor in odd divisor 1187define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind { 1188; CHECK-SSE2-LABEL: test_urem_odd_INT_MIN: 1189; CHECK-SSE2: # %bb.0: 1190; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = <3435973837,u,2,u> 1191; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 1192; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1193; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1194; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 1195; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 1196; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1197; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2 1198; CHECK-SSE2-NEXT: psrld $2, %xmm2 1199; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 1200; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 1201; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1202; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1203; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 1204; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1205; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1206; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 1207; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1208; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1209; CHECK-SSE2-NEXT: psrld $31, %xmm0 1210; CHECK-SSE2-NEXT: retq 1211; 1212; CHECK-SSE41-LABEL: test_urem_odd_INT_MIN: 1213; CHECK-SSE41: # %bb.0: 1214; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1215; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1 1216; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <3435973837,u,2,u> 1217; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 1218; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1219; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1220; CHECK-SSE41-NEXT: psrld $2, %xmm1 1221; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 1222; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 1223; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 1224; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1225; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1226; CHECK-SSE41-NEXT: psrld $31, %xmm0 1227; CHECK-SSE41-NEXT: retq 1228; 1229; CHECK-AVX1-LABEL: test_urem_odd_INT_MIN: 1230; CHECK-AVX1: # %bb.0: 1231; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1232; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 1233; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm2 1234; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1235; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1236; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 1237; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 1238; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1239; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1240; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1241; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1242; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1243; CHECK-AVX1-NEXT: retq 1244; 1245; CHECK-AVX2-LABEL: test_urem_odd_INT_MIN: 1246; CHECK-AVX2: # %bb.0: 1247; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1248; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837] 1249; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 1250; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm2 1251; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1252; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 1253; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 1254; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1255; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1256; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1257; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1258; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1259; CHECK-AVX2-NEXT: retq 1260; 1261; CHECK-AVX512VL-LABEL: test_urem_odd_INT_MIN: 1262; CHECK-AVX512VL: # %bb.0: 1263; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1264; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1265; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1266; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1267; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1268; CHECK-AVX512VL-NEXT: retq 1269 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5> 1270 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1271 %ret = zext <4 x i1> %cmp to <4 x i32> 1272 ret <4 x i32> %ret 1273} 1274 1275; One INT_MIN divisor in even divisor 1276define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind { 1277; CHECK-SSE2-LABEL: test_urem_even_INT_MIN: 1278; CHECK-SSE2: # %bb.0: 1279; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 1280; CHECK-SSE2-NEXT: psrld $1, %xmm1 1281; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 1282; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 1283; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 1284; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 1285; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1286; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 1287; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1288; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1289; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 1290; CHECK-SSE2-NEXT: psrld $2, %xmm1 1291; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 1292; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 1293; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1294; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1295; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 1296; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1297; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1298; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 1299; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1300; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1301; CHECK-SSE2-NEXT: psrld $31, %xmm0 1302; CHECK-SSE2-NEXT: retq 1303; 1304; CHECK-SSE41-LABEL: test_urem_even_INT_MIN: 1305; CHECK-SSE41: # %bb.0: 1306; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 1307; CHECK-SSE41-NEXT: psrld $1, %xmm1 1308; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1309; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 1310; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1 1311; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1312; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2 1313; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1314; CHECK-SSE41-NEXT: psrld $2, %xmm2 1315; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7] 1316; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 1317; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 1318; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1319; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1320; CHECK-SSE41-NEXT: psrld $31, %xmm0 1321; CHECK-SSE41-NEXT: retq 1322; 1323; CHECK-AVX1-LABEL: test_urem_even_INT_MIN: 1324; CHECK-AVX1: # %bb.0: 1325; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 1326; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 1327; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2 1328; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1329; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1330; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 1331; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1332; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 1333; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7] 1334; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1335; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1336; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1337; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1338; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1339; CHECK-AVX1-NEXT: retq 1340; 1341; CHECK-AVX2-LABEL: test_urem_even_INT_MIN: 1342; CHECK-AVX2: # %bb.0: 1343; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1 1344; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1345; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 1346; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 1347; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 1348; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1349; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1350; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 1351; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1352; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1353; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1354; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1355; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1356; CHECK-AVX2-NEXT: retq 1357; 1358; CHECK-AVX512VL-LABEL: test_urem_even_INT_MIN: 1359; CHECK-AVX512VL: # %bb.0: 1360; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1361; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1362; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1363; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1364; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1365; CHECK-AVX512VL-NEXT: retq 1366 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14> 1367 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1368 %ret = zext <4 x i1> %cmp to <4 x i32> 1369 ret <4 x i32> %ret 1370} 1371 1372; One INT_MIN divisor in odd+even divisor 1373define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind { 1374; CHECK-SSE2-LABEL: test_urem_odd_even_INT_MIN: 1375; CHECK-SSE2: # %bb.0: 1376; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2454267027,2,1374389535] 1377; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 1378; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 1379; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 1380; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3 1381; CHECK-SSE2-NEXT: psrld $1, %xmm3 1382; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3] 1383; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1384; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 1385; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 1386; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1387; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 1388; CHECK-SSE2-NEXT: psrld $2, %xmm3 1389; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1] 1390; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [5,14,2147483648,100] 1391; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2 1392; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1393; CHECK-SSE2-NEXT: psrld $5, %xmm1 1394; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3] 1395; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] 1396; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 1397; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1398; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1399; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 1400; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1401; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1402; CHECK-SSE2-NEXT: psrld $31, %xmm0 1403; CHECK-SSE2-NEXT: retq 1404; 1405; CHECK-SSE41-LABEL: test_urem_odd_even_INT_MIN: 1406; CHECK-SSE41: # %bb.0: 1407; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 1408; CHECK-SSE41-NEXT: psrld $1, %xmm1 1409; CHECK-SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 1410; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,2454267027,2,1374389535] 1411; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1412; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3 1413; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 1414; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] 1415; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 1416; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1417; CHECK-SSE41-NEXT: psrld $2, %xmm2 1418; CHECK-SSE41-NEXT: psrld $5, %xmm3 1419; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm3[4,5,6,7] 1420; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 1421; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1422; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 1423; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 1424; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1425; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1426; CHECK-SSE41-NEXT: psrld $31, %xmm0 1427; CHECK-SSE41-NEXT: retq 1428; 1429; CHECK-AVX1-LABEL: test_urem_odd_even_INT_MIN: 1430; CHECK-AVX1: # %bb.0: 1431; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 1432; CHECK-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] 1433; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,2,1374389535] 1434; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1435; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 1436; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 1437; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1438; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1439; CHECK-AVX1-NEXT: vpsrld $2, %xmm3, %xmm3 1440; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 1441; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] 1442; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 1443; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 1444; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1445; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1446; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1447; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1448; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1449; CHECK-AVX1-NEXT: retq 1450; 1451; CHECK-AVX2-LABEL: test_urem_odd_even_INT_MIN: 1452; CHECK-AVX2: # %bb.0: 1453; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,2,1374389535] 1454; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1455; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 1456; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1457; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 1458; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 1459; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1460; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1461; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 1462; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1463; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1464; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1465; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1466; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1467; CHECK-AVX2-NEXT: retq 1468; 1469; CHECK-AVX512VL-LABEL: test_urem_odd_even_INT_MIN: 1470; CHECK-AVX512VL: # %bb.0: 1471; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1472; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1473; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1474; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1475; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1476; CHECK-AVX512VL-NEXT: retq 1477 %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100> 1478 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1479 %ret = zext <4 x i1> %cmp to <4 x i32> 1480 ret <4 x i32> %ret 1481} 1482 1483;==============================================================================; 1484 1485; One all-ones divisor and power-of-two divisor divisor in odd divisor 1486define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1487; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo: 1488; CHECK-SSE2: # %bb.0: 1489; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,3435973837] 1490; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 1491; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 1492; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 1493; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1494; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1495; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 1496; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 1497; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1498; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 1499; CHECK-SSE2-NEXT: psrld $2, %xmm1 1500; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1] 1501; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [5,4294967295,16,5] 1502; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2 1503; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1504; CHECK-SSE2-NEXT: psrld $31, %xmm3 1505; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3] 1506; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] 1507; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 1508; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1509; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1510; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 1511; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1512; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1513; CHECK-SSE2-NEXT: psrld $31, %xmm0 1514; CHECK-SSE2-NEXT: retq 1515; 1516; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo: 1517; CHECK-SSE41: # %bb.0: 1518; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,3435973837] 1519; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1520; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1521; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3 1522; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 1523; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1524; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 1525; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1526; CHECK-SSE41-NEXT: psrld $2, %xmm2 1527; CHECK-SSE41-NEXT: psrld $31, %xmm3 1528; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 1529; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 1530; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 1531; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 1532; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 1533; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1534; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1535; CHECK-SSE41-NEXT: psrld $31, %xmm0 1536; CHECK-SSE41-NEXT: retq 1537; 1538; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo: 1539; CHECK-AVX1: # %bb.0: 1540; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,3435973837] 1541; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1542; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1543; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 1544; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 1545; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1546; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1547; CHECK-AVX1-NEXT: vpsrld $2, %xmm3, %xmm3 1548; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 1549; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 1550; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] 1551; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1552; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1553; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1554; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1555; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1556; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1557; CHECK-AVX1-NEXT: retq 1558; 1559; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo: 1560; CHECK-AVX2: # %bb.0: 1561; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,3435973837] 1562; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1563; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1564; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 1565; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 1566; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1567; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1568; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 1569; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1570; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1571; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1572; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1573; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1574; CHECK-AVX2-NEXT: retq 1575; 1576; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo: 1577; CHECK-AVX512VL: # %bb.0: 1578; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1579; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1580; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1581; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1582; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1583; CHECK-AVX512VL-NEXT: retq 1584 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5> 1585 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1586 %ret = zext <4 x i1> %cmp to <4 x i32> 1587 ret <4 x i32> %ret 1588} 1589 1590; One all-ones divisor and power-of-two divisor divisor in even divisor 1591define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1592; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo: 1593; CHECK-SSE2: # %bb.0: 1594; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 1595; CHECK-SSE2-NEXT: psrld $1, %xmm1 1596; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 1597; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[3,3] 1598; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2147483649,268435456,2454267027] 1599; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1600; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm4 1601; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,3,2,3] 1602; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 1603; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 1604; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1605; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1606; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2 1607; CHECK-SSE2-NEXT: psrld $2, %xmm2 1608; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 1609; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [14,4294967295,16,14] 1610; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 1611; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1612; CHECK-SSE2-NEXT: psrld $31, %xmm4 1613; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm2[3,3] 1614; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] 1615; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2 1616; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1617; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1618; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 1619; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1620; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1621; CHECK-SSE2-NEXT: psrld $31, %xmm0 1622; CHECK-SSE2-NEXT: retq 1623; 1624; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo: 1625; CHECK-SSE41: # %bb.0: 1626; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 1627; CHECK-SSE41-NEXT: psrld $1, %xmm1 1628; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 1629; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1630; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2147483649,268435456,2454267027] 1631; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1632; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm4 1633; CHECK-SSE41-NEXT: pmuludq %xmm3, %xmm1 1634; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1635; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 1636; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 1637; CHECK-SSE41-NEXT: psrld $2, %xmm2 1638; CHECK-SSE41-NEXT: psrld $31, %xmm4 1639; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm2[4,5,6,7] 1640; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 1641; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 1642; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 1643; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 1644; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1645; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1646; CHECK-SSE41-NEXT: psrld $31, %xmm0 1647; CHECK-SSE41-NEXT: retq 1648; 1649; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo: 1650; CHECK-AVX1: # %bb.0: 1651; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 1652; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 1653; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1654; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2454267027,2147483649,268435456,2454267027] 1655; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1656; CHECK-AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 1657; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 1658; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1659; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1660; CHECK-AVX1-NEXT: vpsrld $2, %xmm3, %xmm3 1661; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 1662; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 1663; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] 1664; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1665; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1666; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1667; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1668; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1669; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1670; CHECK-AVX1-NEXT: retq 1671; 1672; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo: 1673; CHECK-AVX2: # %bb.0: 1674; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,268435456,2454267027] 1675; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1676; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 1677; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1678; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 1679; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 1680; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1681; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1682; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 1683; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1684; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1685; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1686; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1687; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1688; CHECK-AVX2-NEXT: retq 1689; 1690; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo: 1691; CHECK-AVX512VL: # %bb.0: 1692; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1693; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1694; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1695; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1696; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1697; CHECK-AVX512VL-NEXT: retq 1698 %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14> 1699 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1700 %ret = zext <4 x i1> %cmp to <4 x i32> 1701 ret <4 x i32> %ret 1702} 1703 1704; One all-ones divisor and power-of-two divisor divisor in odd+even divisor 1705define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind { 1706; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1707; CHECK-SSE2: # %bb.0: 1708; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,1374389535] 1709; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 1710; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 1711; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 1712; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1713; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1714; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 1715; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 1716; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1717; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 1718; CHECK-SSE2-NEXT: psrld $2, %xmm1 1719; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3] 1720; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [5,4294967295,16,100] 1721; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 1722; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1723; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm4 1724; CHECK-SSE2-NEXT: psrld $5, %xmm4 1725; CHECK-SSE2-NEXT: psrld $31, %xmm3 1726; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm4[3,3] 1727; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1728; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 1729; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1730; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1731; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 1732; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1733; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1734; CHECK-SSE2-NEXT: psrld $31, %xmm0 1735; CHECK-SSE2-NEXT: retq 1736; 1737; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1738; CHECK-SSE41: # %bb.0: 1739; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,1374389535] 1740; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1741; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1742; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3 1743; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm2 1744; CHECK-SSE41-NEXT: psrld $5, %xmm2 1745; CHECK-SSE41-NEXT: psrld $31, %xmm3 1746; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 1747; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 1748; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] 1749; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1750; CHECK-SSE41-NEXT: psrld $2, %xmm1 1751; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 1752; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 1753; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 1754; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 1755; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1756; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1757; CHECK-SSE41-NEXT: psrld $31, %xmm0 1758; CHECK-SSE41-NEXT: retq 1759; 1760; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1761; CHECK-AVX1: # %bb.0: 1762; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,1374389535] 1763; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1764; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1765; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 1766; CHECK-AVX1-NEXT: vpsrld $5, %xmm2, %xmm3 1767; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 1768; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 1769; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 1770; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] 1771; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1772; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 1773; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 1774; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1775; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1776; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1777; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1778; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1779; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1780; CHECK-AVX1-NEXT: retq 1781; 1782; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1783; CHECK-AVX2: # %bb.0: 1784; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,1374389535] 1785; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1786; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1787; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 1788; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 1789; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1790; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1791; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 1792; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1793; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1794; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1795; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1796; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1797; CHECK-AVX2-NEXT: retq 1798; 1799; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_poweroftwo: 1800; CHECK-AVX512VL: # %bb.0: 1801; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1802; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1803; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1804; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1805; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1806; CHECK-AVX512VL-NEXT: retq 1807 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100> 1808 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1809 %ret = zext <4 x i1> %cmp to <4 x i32> 1810 ret <4 x i32> %ret 1811} 1812 1813;------------------------------------------------------------------------------; 1814 1815; One all-ones divisor and one one divisor in odd divisor 1816define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind { 1817; CHECK-SSE2-LABEL: test_urem_odd_allones_and_one: 1818; CHECK-SSE2: # %bb.0: 1819; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,4294967295,0,3435973837] 1820; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1821; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 1822; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1823; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1824; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 1825; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1826; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1827; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 1828; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 1829; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 1830; CHECK-SSE2-NEXT: retq 1831; 1832; CHECK-SSE41-LABEL: test_urem_odd_allones_and_one: 1833; CHECK-SSE41: # %bb.0: 1834; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 1835; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,1,4294967295,858993459] 1836; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 1837; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1838; CHECK-SSE41-NEXT: psrld $31, %xmm0 1839; CHECK-SSE41-NEXT: retq 1840; 1841; CHECK-AVX-LABEL: test_urem_odd_allones_and_one: 1842; CHECK-AVX: # %bb.0: 1843; CHECK-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1844; CHECK-AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1845; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1846; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 1847; CHECK-AVX-NEXT: retq 1848 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5> 1849 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1850 %ret = zext <4 x i1> %cmp to <4 x i32> 1851 ret <4 x i32> %ret 1852} 1853 1854; One all-ones divisor and one one divisor in even divisor 1855define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind { 1856; CHECK-SSE2-LABEL: test_urem_even_allones_and_one: 1857; CHECK-SSE2: # %bb.0: 1858; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 1859; CHECK-SSE2-NEXT: psrld $1, %xmm1 1860; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 1861; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[3,3] 1862; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2147483649,0,2454267027] 1863; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1864; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm4 1865; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,3,2,3] 1866; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 1867; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 1868; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 1869; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1870; CHECK-SSE2-NEXT: psrld $2, %xmm1 1871; CHECK-SSE2-NEXT: psrld $31, %xmm4 1872; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,0],xmm1[3,3] 1873; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,4294967295,1,14] 1874; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1875; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 1876; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1877; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 1878; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 1879; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1880; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1881; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 1882; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1883; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1884; CHECK-SSE2-NEXT: psrld $31, %xmm0 1885; CHECK-SSE2-NEXT: retq 1886; 1887; CHECK-SSE41-LABEL: test_urem_even_allones_and_one: 1888; CHECK-SSE41: # %bb.0: 1889; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 1890; CHECK-SSE41-NEXT: psrld $1, %xmm1 1891; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 1892; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1893; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2147483649,0,2454267027] 1894; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1895; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm4 1896; CHECK-SSE41-NEXT: pmuludq %xmm3, %xmm1 1897; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1898; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7] 1899; CHECK-SSE41-NEXT: psrld $2, %xmm1 1900; CHECK-SSE41-NEXT: psrld $31, %xmm4 1901; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm1[0,1],xmm4[2,3],xmm1[4,5,6,7] 1902; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm0[4,5],xmm4[6,7] 1903; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm4 1904; CHECK-SSE41-NEXT: psubd %xmm4, %xmm0 1905; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 1906; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 1907; CHECK-SSE41-NEXT: psrld $31, %xmm0 1908; CHECK-SSE41-NEXT: retq 1909; 1910; CHECK-AVX1-LABEL: test_urem_even_allones_and_one: 1911; CHECK-AVX1: # %bb.0: 1912; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 1913; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 1914; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1915; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2454267027,2147483649,0,2454267027] 1916; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1917; CHECK-AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 1918; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 1919; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1920; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 1921; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 1922; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 1923; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7] 1924; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 1925; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1926; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1927; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1928; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1929; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 1930; CHECK-AVX1-NEXT: retq 1931; 1932; CHECK-AVX2-LABEL: test_urem_even_allones_and_one: 1933; CHECK-AVX2: # %bb.0: 1934; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027] 1935; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1936; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 1937; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1938; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 1939; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 1940; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1941; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1942; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 1943; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] 1944; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 1945; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 1946; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1947; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1948; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 1949; CHECK-AVX2-NEXT: retq 1950; 1951; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_one: 1952; CHECK-AVX512VL: # %bb.0: 1953; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 1954; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 1955; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 1956; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 1957; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 1958; CHECK-AVX512VL-NEXT: retq 1959 %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14> 1960 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 1961 %ret = zext <4 x i1> %cmp to <4 x i32> 1962 ret <4 x i32> %ret 1963} 1964 1965; One all-ones divisor and one one divisor in odd+even divisor 1966define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind { 1967; CHECK-SSE2-LABEL: test_urem_odd_even_allones_and_one: 1968; CHECK-SSE2: # %bb.0: 1969; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,1374389535] 1970; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1971; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1972; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3 1973; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm2 1974; CHECK-SSE2-NEXT: psrld $5, %xmm2 1975; CHECK-SSE2-NEXT: psrld $31, %xmm3 1976; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3] 1977; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [5,4294967295,1,100] 1978; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 1979; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm4 1980; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] 1981; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 1982; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1983; CHECK-SSE2-NEXT: psrld $2, %xmm1 1984; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 1985; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 1986; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 1987; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1988; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 1989; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 1990; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 1991; CHECK-SSE2-NEXT: psrld $31, %xmm0 1992; CHECK-SSE2-NEXT: retq 1993; 1994; CHECK-SSE41-LABEL: test_urem_odd_even_allones_and_one: 1995; CHECK-SSE41: # %bb.0: 1996; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,1374389535] 1997; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1998; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1999; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3 2000; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm2 2001; CHECK-SSE41-NEXT: psrld $5, %xmm2 2002; CHECK-SSE41-NEXT: psrld $31, %xmm3 2003; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7] 2004; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 2005; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2006; CHECK-SSE41-NEXT: psrld $2, %xmm1 2007; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2008; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2009; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 2010; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 2011; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2012; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2013; CHECK-SSE41-NEXT: psrld $31, %xmm0 2014; CHECK-SSE41-NEXT: retq 2015; 2016; CHECK-AVX1-LABEL: test_urem_odd_even_allones_and_one: 2017; CHECK-AVX1: # %bb.0: 2018; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,1374389535] 2019; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2020; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2021; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 2022; CHECK-AVX1-NEXT: vpsrld $5, %xmm2, %xmm3 2023; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 2024; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 2025; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 2026; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2027; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 2028; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2029; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2030; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2031; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2032; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2033; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2034; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2035; CHECK-AVX1-NEXT: retq 2036; 2037; CHECK-AVX2-LABEL: test_urem_odd_even_allones_and_one: 2038; CHECK-AVX2: # %bb.0: 2039; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,1374389535] 2040; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2041; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2042; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 2043; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 2044; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2045; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2046; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 2047; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] 2048; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2049; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2050; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 2051; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2052; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2053; CHECK-AVX2-NEXT: retq 2054; 2055; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_one: 2056; CHECK-AVX512VL: # %bb.0: 2057; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2058; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2059; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2060; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2061; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2062; CHECK-AVX512VL-NEXT: retq 2063 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100> 2064 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 2065 %ret = zext <4 x i1> %cmp to <4 x i32> 2066 ret <4 x i32> %ret 2067} 2068 2069;------------------------------------------------------------------------------; 2070 2071; One power-of-two divisor divisor and one divisor in odd divisor 2072define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind { 2073; CHECK-SSE2-LABEL: test_urem_odd_poweroftwo_and_one: 2074; CHECK-SSE2: # %bb.0: 2075; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,268435456,0,3435973837] 2076; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 2077; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 2078; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 2079; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2080; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2081; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 2082; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 2083; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2084; CHECK-SSE2-NEXT: psrld $2, %xmm2 2085; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3] 2086; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [5,16,1,5] 2087; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 2088; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm4 2089; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] 2090; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3] 2091; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 2092; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 2093; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 2094; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 2095; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2096; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2097; CHECK-SSE2-NEXT: psrld $31, %xmm0 2098; CHECK-SSE2-NEXT: retq 2099; 2100; CHECK-SSE41-LABEL: test_urem_odd_poweroftwo_and_one: 2101; CHECK-SSE41: # %bb.0: 2102; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,268435456,0,3435973837] 2103; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2104; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2105; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3 2106; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 2107; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2108; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2109; CHECK-SSE41-NEXT: psrld $2, %xmm1 2110; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7] 2111; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2112; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 2113; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 2114; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2115; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2116; CHECK-SSE41-NEXT: psrld $31, %xmm0 2117; CHECK-SSE41-NEXT: retq 2118; 2119; CHECK-AVX1-LABEL: test_urem_odd_poweroftwo_and_one: 2120; CHECK-AVX1: # %bb.0: 2121; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,3435973837] 2122; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2123; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2124; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 2125; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 2126; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2127; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2128; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 2129; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7] 2130; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2131; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2132; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2133; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2134; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2135; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2136; CHECK-AVX1-NEXT: retq 2137; 2138; CHECK-AVX2-LABEL: test_urem_odd_poweroftwo_and_one: 2139; CHECK-AVX2: # %bb.0: 2140; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,3435973837] 2141; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2142; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2143; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 2144; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 2145; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2146; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2147; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 2148; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] 2149; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2150; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2151; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 2152; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2153; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2154; CHECK-AVX2-NEXT: retq 2155; 2156; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo_and_one: 2157; CHECK-AVX512VL: # %bb.0: 2158; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2159; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2160; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2161; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2162; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2163; CHECK-AVX512VL-NEXT: retq 2164 %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5> 2165 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 2166 %ret = zext <4 x i1> %cmp to <4 x i32> 2167 ret <4 x i32> %ret 2168} 2169 2170; One power-of-two divisor divisor and one divisor in even divisor 2171define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind { 2172; CHECK-SSE2-LABEL: test_urem_even_poweroftwo_and_one: 2173; CHECK-SSE2: # %bb.0: 2174; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 2175; CHECK-SSE2-NEXT: psrld $1, %xmm1 2176; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 2177; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[3,3] 2178; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,268435456,0,2454267027] 2179; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2180; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm4 2181; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,3,2,3] 2182; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 2183; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 2184; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 2185; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 2186; CHECK-SSE2-NEXT: psrld $2, %xmm1 2187; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[3,3] 2188; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,16,1,14] 2189; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 2190; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3 2191; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 2192; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 2193; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 2194; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2195; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 2196; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 2197; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2198; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2199; CHECK-SSE2-NEXT: psrld $31, %xmm0 2200; CHECK-SSE2-NEXT: retq 2201; 2202; CHECK-SSE41-LABEL: test_urem_even_poweroftwo_and_one: 2203; CHECK-SSE41: # %bb.0: 2204; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 2205; CHECK-SSE41-NEXT: psrld $1, %xmm1 2206; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 2207; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2208; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,268435456,0,2454267027] 2209; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2210; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm4 2211; CHECK-SSE41-NEXT: pmuludq %xmm3, %xmm1 2212; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2213; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7] 2214; CHECK-SSE41-NEXT: psrld $2, %xmm1 2215; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5,6,7] 2216; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2217; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 2218; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 2219; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2220; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2221; CHECK-SSE41-NEXT: psrld $31, %xmm0 2222; CHECK-SSE41-NEXT: retq 2223; 2224; CHECK-AVX1-LABEL: test_urem_even_poweroftwo_and_one: 2225; CHECK-AVX1: # %bb.0: 2226; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 2227; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7] 2228; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2229; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2454267027,268435456,0,2454267027] 2230; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2231; CHECK-AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 2232; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 2233; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2234; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2235; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 2236; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7] 2237; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2238; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2239; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2240; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2241; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2242; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2243; CHECK-AVX1-NEXT: retq 2244; 2245; CHECK-AVX2-LABEL: test_urem_even_poweroftwo_and_one: 2246; CHECK-AVX2: # %bb.0: 2247; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,268435456,0,2454267027] 2248; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2249; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 2250; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2251; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 2252; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 2253; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2254; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2255; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 2256; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] 2257; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2258; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2259; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 2260; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2261; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2262; CHECK-AVX2-NEXT: retq 2263; 2264; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo_and_one: 2265; CHECK-AVX512VL: # %bb.0: 2266; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2267; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2268; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2269; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2270; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2271; CHECK-AVX512VL-NEXT: retq 2272 %urem = urem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14> 2273 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 2274 %ret = zext <4 x i1> %cmp to <4 x i32> 2275 ret <4 x i32> %ret 2276} 2277 2278; One power-of-two divisor divisor and one divisor in odd+even divisor 2279define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind { 2280; CHECK-SSE2-LABEL: test_urem_odd_even_poweroftwo_and_one: 2281; CHECK-SSE2: # %bb.0: 2282; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,268435456,0,1374389535] 2283; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2284; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2285; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3 2286; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm2 2287; CHECK-SSE2-NEXT: psrld $5, %xmm2 2288; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3] 2289; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [5,16,1,100] 2290; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 2291; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm4 2292; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3] 2293; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 2294; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2295; CHECK-SSE2-NEXT: psrld $2, %xmm1 2296; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 2297; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 2298; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2299; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 2300; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 2301; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2302; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2303; CHECK-SSE2-NEXT: psrld $31, %xmm0 2304; CHECK-SSE2-NEXT: retq 2305; 2306; CHECK-SSE41-LABEL: test_urem_odd_even_poweroftwo_and_one: 2307; CHECK-SSE41: # %bb.0: 2308; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,268435456,0,1374389535] 2309; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2310; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2311; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3 2312; CHECK-SSE41-NEXT: movdqa %xmm3, %xmm2 2313; CHECK-SSE41-NEXT: psrld $5, %xmm2 2314; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 2315; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 2316; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2317; CHECK-SSE41-NEXT: psrld $2, %xmm1 2318; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2319; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2320; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 2321; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 2322; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2323; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2324; CHECK-SSE41-NEXT: psrld $31, %xmm0 2325; CHECK-SSE41-NEXT: retq 2326; 2327; CHECK-AVX1-LABEL: test_urem_odd_even_poweroftwo_and_one: 2328; CHECK-AVX1: # %bb.0: 2329; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,1374389535] 2330; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2331; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2332; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 2333; CHECK-AVX1-NEXT: vpsrld $5, %xmm2, %xmm3 2334; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 2335; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 2336; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2337; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 2338; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2339; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] 2340; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2341; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2342; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2343; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2344; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2345; CHECK-AVX1-NEXT: retq 2346; 2347; CHECK-AVX2-LABEL: test_urem_odd_even_poweroftwo_and_one: 2348; CHECK-AVX2: # %bb.0: 2349; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,1374389535] 2350; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2351; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2352; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 2353; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 2354; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2355; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2356; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 2357; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] 2358; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2359; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2360; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 2361; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2362; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2363; CHECK-AVX2-NEXT: retq 2364; 2365; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo_and_one: 2366; CHECK-AVX512VL: # %bb.0: 2367; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2368; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2369; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2370; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2371; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2372; CHECK-AVX512VL-NEXT: retq 2373 %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100> 2374 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 2375 %ret = zext <4 x i1> %cmp to <4 x i32> 2376 ret <4 x i32> %ret 2377} 2378 2379;------------------------------------------------------------------------------; 2380 2381define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { 2382; CHECK-SSE2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 2383; CHECK-SSE2: # %bb.0: 2384; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,0] 2385; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 2386; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 2387; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 2388; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2389; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2390; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 2391; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 2392; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2393; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 2394; CHECK-SSE2-NEXT: psrld $2, %xmm1 2395; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3] 2396; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [5,4294967295,16,1] 2397; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 2398; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2399; CHECK-SSE2-NEXT: psrld $31, %xmm3 2400; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3] 2401; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2402; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2 2403; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2404; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 2405; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 2406; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2407; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2408; CHECK-SSE2-NEXT: psrld $31, %xmm0 2409; CHECK-SSE2-NEXT: retq 2410; 2411; CHECK-SSE41-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 2412; CHECK-SSE41: # %bb.0: 2413; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,0] 2414; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm2 2415; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm2 2416; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[3,3,3,3] 2417; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2418; CHECK-SSE41-NEXT: psrld $2, %xmm2 2419; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 2420; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2421; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2422; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3 2423; CHECK-SSE41-NEXT: psrld $31, %xmm3 2424; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7] 2425; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5],xmm0[6,7] 2426; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm3 2427; CHECK-SSE41-NEXT: psubd %xmm3, %xmm0 2428; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2429; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2430; CHECK-SSE41-NEXT: psrld $31, %xmm0 2431; CHECK-SSE41-NEXT: retq 2432; 2433; CHECK-AVX1-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 2434; CHECK-AVX1: # %bb.0: 2435; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,0] 2436; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 2437; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[3,3,3,3] 2438; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2439; CHECK-AVX1-NEXT: vpsrld $2, %xmm2, %xmm2 2440; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 2441; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2442; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2443; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 2444; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1 2445; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 2446; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm0[6,7] 2447; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2448; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2449; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2450; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2451; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2452; CHECK-AVX1-NEXT: retq 2453; 2454; CHECK-AVX2-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 2455; CHECK-AVX2: # %bb.0: 2456; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,0] 2457; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2458; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2459; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 2460; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 2461; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2462; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2463; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 2464; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] 2465; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2466; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2467; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 2468; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2469; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2470; CHECK-AVX2-NEXT: retq 2471; 2472; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: 2473; CHECK-AVX512VL: # %bb.0: 2474; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2475; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2476; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2477; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2478; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2479; CHECK-AVX512VL-NEXT: retq 2480 %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1> 2481 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 2482 %ret = zext <4 x i1> %cmp to <4 x i32> 2483 ret <4 x i32> %ret 2484} 2485 2486define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind { 2487; CHECK-SSE2-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 2488; CHECK-SSE2: # %bb.0: 2489; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2147483649,268435456,0] 2490; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2491; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2492; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3 2493; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3] 2494; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4 2495; CHECK-SSE2-NEXT: psrld $1, %xmm4 2496; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm0[2,3] 2497; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4 2498; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,3,2,3] 2499; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 2500; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2 2501; CHECK-SSE2-NEXT: psrld $2, %xmm2 2502; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3] 2503; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [14,4294967295,16,1] 2504; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 2505; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 2506; CHECK-SSE2-NEXT: psrld $31, %xmm3 2507; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3] 2508; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2509; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1 2510; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 2511; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2512; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 2513; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 2514; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 2515; CHECK-SSE2-NEXT: psrld $31, %xmm0 2516; CHECK-SSE2-NEXT: retq 2517; 2518; CHECK-SSE41-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 2519; CHECK-SSE41: # %bb.0: 2520; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1 2521; CHECK-SSE41-NEXT: psrld $1, %xmm1 2522; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5,6,7] 2523; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2147483649,268435456,0] 2524; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 2525; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] 2526; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2527; CHECK-SSE41-NEXT: psrld $2, %xmm1 2528; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 2529; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2530; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2531; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3 2532; CHECK-SSE41-NEXT: psrld $31, %xmm3 2533; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 2534; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5],xmm0[6,7] 2535; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm3 2536; CHECK-SSE41-NEXT: psubd %xmm3, %xmm0 2537; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 2538; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 2539; CHECK-SSE41-NEXT: psrld $31, %xmm0 2540; CHECK-SSE41-NEXT: retq 2541; 2542; CHECK-AVX1-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 2543; CHECK-AVX1: # %bb.0: 2544; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1 2545; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3,4,5,6,7] 2546; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2454267027,2147483649,268435456,0] 2547; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 2548; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[3,3,3,3] 2549; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2550; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 2551; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] 2552; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 2553; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 2554; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 2555; CHECK-AVX1-NEXT: vpsrld $31, %xmm2, %xmm2 2556; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 2557; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm0[6,7] 2558; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2559; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2560; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2561; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2562; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 2563; CHECK-AVX1-NEXT: retq 2564; 2565; CHECK-AVX2-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 2566; CHECK-AVX2: # %bb.0: 2567; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,268435456,0] 2568; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 2569; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 2570; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 2571; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 2572; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 2573; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 2574; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 2575; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 2576; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] 2577; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 2578; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 2579; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 2580; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2581; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 2582; CHECK-AVX2-NEXT: retq 2583; 2584; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo_and_one: 2585; CHECK-AVX512VL: # %bb.0: 2586; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 2587; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 2588; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 2589; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 2590; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 2591; CHECK-AVX512VL-NEXT: retq 2592 %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1> 2593 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 2594 %ret = zext <4 x i1> %cmp to <4 x i32> 2595 ret <4 x i32> %ret 2596} 2597