1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE2 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE41 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2 6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL 7 8; Odd divisor 9define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind { 10; CHECK-SSE2-LABEL: test_urem_odd_25: 11; CHECK-SSE2: # %bb.0: 12; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145] 13; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 14; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 15; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 16; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 17; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 18; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 19; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 20; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 21; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 22; CHECK-SSE2-NEXT: retq 23; 24; CHECK-SSE41-LABEL: test_urem_odd_25: 25; CHECK-SSE41: # %bb.0: 26; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 27; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691] 28; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 29; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 30; CHECK-SSE41-NEXT: psrld $31, %xmm0 31; CHECK-SSE41-NEXT: retq 32; 33; CHECK-AVX1-LABEL: test_urem_odd_25: 34; CHECK-AVX1: # %bb.0: 35; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 36; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 37; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 38; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 39; CHECK-AVX1-NEXT: retq 40; 41; CHECK-AVX2-LABEL: test_urem_odd_25: 42; CHECK-AVX2: # %bb.0: 43; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145] 44; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 45; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [171798691,171798691,171798691,171798691] 46; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 47; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 48; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 49; CHECK-AVX2-NEXT: retq 50; 51; CHECK-AVX512VL-LABEL: test_urem_odd_25: 52; CHECK-AVX512VL: # %bb.0: 53; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 54; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1 55; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 56; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 57; CHECK-AVX512VL-NEXT: retq 58 %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25> 59 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 60 %ret = zext <4 x i1> %cmp to <4 x i32> 61 ret <4 x i32> %ret 62} 63 64; Even divisors 65define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind { 66; CHECK-SSE2-LABEL: test_urem_even_100: 67; CHECK-SSE2: # %bb.0: 68; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535] 69; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 70; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 71; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 72; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 73; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 74; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 75; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 76; CHECK-SSE2-NEXT: psrld $5, %xmm2 77; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [100,100,100,100] 78; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 79; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 80; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 81; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 82; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 83; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 84; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 85; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 86; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 87; CHECK-SSE2-NEXT: psrld $31, %xmm0 88; CHECK-SSE2-NEXT: retq 89; 90; CHECK-SSE41-LABEL: test_urem_even_100: 91; CHECK-SSE41: # %bb.0: 92; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 93; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 94; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 95; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 96; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 97; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 98; CHECK-SSE41-NEXT: psrld $5, %xmm2 99; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 100; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 101; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 102; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 103; CHECK-SSE41-NEXT: psrld $31, %xmm0 104; CHECK-SSE41-NEXT: retq 105; 106; CHECK-AVX1-LABEL: test_urem_even_100: 107; CHECK-AVX1: # %bb.0: 108; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 109; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 110; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 111; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 112; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 113; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 114; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 115; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 116; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 117; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 118; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 119; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 120; CHECK-AVX1-NEXT: retq 121; 122; CHECK-AVX2-LABEL: test_urem_even_100: 123; CHECK-AVX2: # %bb.0: 124; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 125; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 126; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 127; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 128; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 129; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 130; CHECK-AVX2-NEXT: vpsrld $5, %xmm1, %xmm1 131; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [100,100,100,100] 132; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 133; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 134; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 135; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 136; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 137; CHECK-AVX2-NEXT: retq 138; 139; CHECK-AVX512VL-LABEL: test_urem_even_100: 140; CHECK-AVX512VL: # %bb.0: 141; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0 142; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0 143; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1 144; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 145; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 146; CHECK-AVX512VL-NEXT: retq 147 %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100> 148 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 149 %ret = zext <4 x i1> %cmp to <4 x i32> 150 ret <4 x i32> %ret 151} 152 153; Negative divisors should be negated, and thus this is still splat vectors. 154 155; Odd divisor 156define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind { 157; CHECK-SSE2-LABEL: test_urem_odd_neg25: 158; CHECK-SSE2: # %bb.0: 159; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,1030792151,1030792151,3264175145] 160; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 161; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 162; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 163; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 164; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 165; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 166; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 167; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 168; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 169; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 170; CHECK-SSE2-NEXT: retq 171; 172; CHECK-SSE41-LABEL: test_urem_odd_neg25: 173; CHECK-SSE41: # %bb.0: 174; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 175; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798691,1,1,171798691] 176; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 177; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 178; CHECK-SSE41-NEXT: psrld $31, %xmm0 179; CHECK-SSE41-NEXT: retq 180; 181; CHECK-AVX-LABEL: test_urem_odd_neg25: 182; CHECK-AVX: # %bb.0: 183; CHECK-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 184; CHECK-AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 185; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 186; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 187; CHECK-AVX-NEXT: retq 188 %urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25> 189 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 190 %ret = zext <4 x i1> %cmp to <4 x i32> 191 ret <4 x i32> %ret 192} 193 194; Even divisors 195define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind { 196; CHECK-SSE2-LABEL: test_urem_even_neg100: 197; CHECK-SSE2: # %bb.0: 198; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 199; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 200; CHECK-SSE2-NEXT: psrld $5, %xmm1 201; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 202; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 203; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 204; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 205; CHECK-SSE2-NEXT: psrld $2, %xmm2 206; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 207; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 208; CHECK-SSE2-NEXT: psrld $27, %xmm2 209; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 210; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 211; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 212; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 213; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 214; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 215; CHECK-SSE2-NEXT: psrld $31, %xmm0 216; CHECK-SSE2-NEXT: retq 217; 218; CHECK-SSE41-LABEL: test_urem_even_neg100: 219; CHECK-SSE41: # %bb.0: 220; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 221; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1 222; CHECK-SSE41-NEXT: psrld $5, %xmm1 223; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm2 224; CHECK-SSE41-NEXT: psrld $2, %xmm2 225; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2 226; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 227; CHECK-SSE41-NEXT: psrld $27, %xmm2 228; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 229; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 230; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 231; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 232; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 233; CHECK-SSE41-NEXT: psrld $31, %xmm0 234; CHECK-SSE41-NEXT: retq 235; 236; CHECK-AVX1-LABEL: test_urem_even_neg100: 237; CHECK-AVX1: # %bb.0: 238; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 239; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1 240; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 241; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm2 242; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2 243; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 244; CHECK-AVX1-NEXT: vpsrld $27, %xmm2, %xmm2 245; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 246; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 247; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 248; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 249; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 250; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 251; CHECK-AVX1-NEXT: retq 252; 253; CHECK-AVX2-LABEL: test_urem_even_neg100: 254; CHECK-AVX2: # %bb.0: 255; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1 256; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 257; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1374389535,1374389535,1374389535,1374389535] 258; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 259; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [536870925,536870925,536870925,536870925] 260; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 261; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 262; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 263; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 264; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 265; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 266; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 267; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 268; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 269; CHECK-AVX2-NEXT: retq 270; 271; CHECK-AVX512VL-LABEL: test_urem_even_neg100: 272; CHECK-AVX512VL: # %bb.0: 273; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 274; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0 275; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 276; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 277; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 278; CHECK-AVX512VL-NEXT: retq 279 %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100> 280 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 281 %ret = zext <4 x i1> %cmp to <4 x i32> 282 ret <4 x i32> %ret 283} 284 285;------------------------------------------------------------------------------; 286; Comparison constant has undef elements. 287;------------------------------------------------------------------------------; 288 289define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind { 290; CHECK-SSE2-LABEL: test_urem_odd_undef1: 291; CHECK-SSE2: # %bb.0: 292; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535] 293; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 294; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 295; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 296; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 297; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 298; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 299; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 300; CHECK-SSE2-NEXT: psrld $3, %xmm2 301; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [25,25,25,25] 302; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 303; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 304; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 305; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 306; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 307; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 308; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 309; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 310; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 311; CHECK-SSE2-NEXT: psrld $31, %xmm0 312; CHECK-SSE2-NEXT: retq 313; 314; CHECK-SSE41-LABEL: test_urem_odd_undef1: 315; CHECK-SSE41: # %bb.0: 316; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 317; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 318; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 319; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 320; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 321; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 322; CHECK-SSE41-NEXT: psrld $3, %xmm2 323; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 324; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 325; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 326; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 327; CHECK-SSE41-NEXT: psrld $31, %xmm0 328; CHECK-SSE41-NEXT: retq 329; 330; CHECK-AVX1-LABEL: test_urem_odd_undef1: 331; CHECK-AVX1: # %bb.0: 332; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 333; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 334; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 335; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 336; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 337; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 338; CHECK-AVX1-NEXT: vpsrld $3, %xmm1, %xmm1 339; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 340; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 341; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 342; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 343; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 344; CHECK-AVX1-NEXT: retq 345; 346; CHECK-AVX2-LABEL: test_urem_odd_undef1: 347; CHECK-AVX2: # %bb.0: 348; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 349; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 350; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 351; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 352; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 353; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 354; CHECK-AVX2-NEXT: vpsrld $3, %xmm1, %xmm1 355; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [25,25,25,25] 356; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 357; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 358; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 359; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 360; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 361; CHECK-AVX2-NEXT: retq 362; 363; CHECK-AVX512VL-LABEL: test_urem_odd_undef1: 364; CHECK-AVX512VL: # %bb.0: 365; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 366; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 367; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 368; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 369; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 370; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 371; CHECK-AVX512VL-NEXT: vpsrld $3, %xmm1, %xmm1 372; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1 373; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 374; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 375; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 376; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 377; CHECK-AVX512VL-NEXT: retq 378 %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25> 379 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0> 380 %ret = zext <4 x i1> %cmp to <4 x i32> 381 ret <4 x i32> %ret 382} 383 384define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind { 385; CHECK-SSE2-LABEL: test_urem_even_undef1: 386; CHECK-SSE2: # %bb.0: 387; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535] 388; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 389; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 390; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 391; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 392; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 393; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 394; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 395; CHECK-SSE2-NEXT: psrld $5, %xmm2 396; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [100,100,100,100] 397; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 398; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 399; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 400; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 401; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 402; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 403; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0 404; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 405; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 406; CHECK-SSE2-NEXT: psrld $31, %xmm0 407; CHECK-SSE2-NEXT: retq 408; 409; CHECK-SSE41-LABEL: test_urem_even_undef1: 410; CHECK-SSE41: # %bb.0: 411; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 412; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 413; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1 414; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2 415; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 416; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 417; CHECK-SSE41-NEXT: psrld $5, %xmm2 418; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 419; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 420; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 421; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 422; CHECK-SSE41-NEXT: psrld $31, %xmm0 423; CHECK-SSE41-NEXT: retq 424; 425; CHECK-AVX1-LABEL: test_urem_even_undef1: 426; CHECK-AVX1: # %bb.0: 427; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 428; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 429; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 430; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 431; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 432; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] 433; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm1 434; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 435; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 436; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 437; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 438; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 439; CHECK-AVX1-NEXT: retq 440; 441; CHECK-AVX2-LABEL: test_urem_even_undef1: 442; CHECK-AVX2: # %bb.0: 443; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 444; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 445; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 446; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 447; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 448; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 449; CHECK-AVX2-NEXT: vpsrld $5, %xmm1, %xmm1 450; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [100,100,100,100] 451; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1 452; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 453; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 454; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 455; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 456; CHECK-AVX2-NEXT: retq 457; 458; CHECK-AVX512VL-LABEL: test_urem_even_undef1: 459; CHECK-AVX512VL: # %bb.0: 460; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 461; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535] 462; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 463; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 464; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 465; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] 466; CHECK-AVX512VL-NEXT: vpsrld $5, %xmm1, %xmm1 467; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1 468; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 469; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 470; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 471; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 472; CHECK-AVX512VL-NEXT: retq 473 %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100> 474 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0> 475 %ret = zext <4 x i1> %cmp to <4 x i32> 476 ret <4 x i32> %ret 477} 478 479;------------------------------------------------------------------------------; 480; Negative tests 481;------------------------------------------------------------------------------; 482 483define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind { 484; CHECK-SSE-LABEL: test_urem_one_eq: 485; CHECK-SSE: # %bb.0: 486; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1] 487; CHECK-SSE-NEXT: retq 488; 489; CHECK-AVX1-LABEL: test_urem_one_eq: 490; CHECK-AVX1: # %bb.0: 491; CHECK-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1] 492; CHECK-AVX1-NEXT: retq 493; 494; CHECK-AVX2-LABEL: test_urem_one_eq: 495; CHECK-AVX2: # %bb.0: 496; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1] 497; CHECK-AVX2-NEXT: retq 498; 499; CHECK-AVX512VL-LABEL: test_urem_one_eq: 500; CHECK-AVX512VL: # %bb.0: 501; CHECK-AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1] 502; CHECK-AVX512VL-NEXT: retq 503 %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1> 504 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 505 %ret = zext <4 x i1> %cmp to <4 x i32> 506 ret <4 x i32> %ret 507} 508define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind { 509; CHECK-SSE-LABEL: test_urem_one_ne: 510; CHECK-SSE: # %bb.0: 511; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 512; CHECK-SSE-NEXT: retq 513; 514; CHECK-AVX-LABEL: test_urem_one_ne: 515; CHECK-AVX: # %bb.0: 516; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 517; CHECK-AVX-NEXT: retq 518 %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1> 519 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 520 %ret = zext <4 x i1> %cmp to <4 x i32> 521 ret <4 x i32> %ret 522} 523 524; We can lower remainder of division by powers of two much better elsewhere. 525define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind { 526; CHECK-SSE-LABEL: test_urem_pow2: 527; CHECK-SSE: # %bb.0: 528; CHECK-SSE-NEXT: pand {{.*}}(%rip), %xmm0 529; CHECK-SSE-NEXT: pxor %xmm1, %xmm1 530; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0 531; CHECK-SSE-NEXT: psrld $31, %xmm0 532; CHECK-SSE-NEXT: retq 533; 534; CHECK-AVX1-LABEL: test_urem_pow2: 535; CHECK-AVX1: # %bb.0: 536; CHECK-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 537; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 538; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 539; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 540; CHECK-AVX1-NEXT: retq 541; 542; CHECK-AVX2-LABEL: test_urem_pow2: 543; CHECK-AVX2: # %bb.0: 544; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] 545; CHECK-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 546; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 547; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 548; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 549; CHECK-AVX2-NEXT: retq 550; 551; CHECK-AVX512VL-LABEL: test_urem_pow2: 552; CHECK-AVX512VL: # %bb.0: 553; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 554; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 555; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 556; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 557; CHECK-AVX512VL-NEXT: retq 558 %urem = urem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16> 559 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 560 %ret = zext <4 x i1> %cmp to <4 x i32> 561 ret <4 x i32> %ret 562} 563 564; We could lower remainder of division by INT_MIN much better elsewhere. 565define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind { 566; CHECK-SSE-LABEL: test_urem_int_min: 567; CHECK-SSE: # %bb.0: 568; CHECK-SSE-NEXT: pand {{.*}}(%rip), %xmm0 569; CHECK-SSE-NEXT: pxor %xmm1, %xmm1 570; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0 571; CHECK-SSE-NEXT: psrld $31, %xmm0 572; CHECK-SSE-NEXT: retq 573; 574; CHECK-AVX1-LABEL: test_urem_int_min: 575; CHECK-AVX1: # %bb.0: 576; CHECK-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 577; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 578; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 579; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 580; CHECK-AVX1-NEXT: retq 581; 582; CHECK-AVX2-LABEL: test_urem_int_min: 583; CHECK-AVX2: # %bb.0: 584; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647] 585; CHECK-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 586; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 587; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 588; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 589; CHECK-AVX2-NEXT: retq 590; 591; CHECK-AVX512VL-LABEL: test_urem_int_min: 592; CHECK-AVX512VL: # %bb.0: 593; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 594; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 595; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 596; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 597; CHECK-AVX512VL-NEXT: retq 598 %urem = urem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 599 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 600 %ret = zext <4 x i1> %cmp to <4 x i32> 601 ret <4 x i32> %ret 602} 603 604; We could lower remainder of division by all-ones much better elsewhere. 605define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind { 606; CHECK-SSE2-LABEL: test_urem_allones: 607; CHECK-SSE2: # %bb.0: 608; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 609; CHECK-SSE2-NEXT: psubd %xmm0, %xmm1 610; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm1 611; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 612; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm1 613; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 614; CHECK-SSE2-NEXT: retq 615; 616; CHECK-SSE41-LABEL: test_urem_allones: 617; CHECK-SSE41: # %bb.0: 618; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 619; CHECK-SSE41-NEXT: psubd %xmm0, %xmm1 620; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm0 = [1,1,1,1] 621; CHECK-SSE41-NEXT: pminud %xmm1, %xmm0 622; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 623; CHECK-SSE41-NEXT: psrld $31, %xmm0 624; CHECK-SSE41-NEXT: retq 625; 626; CHECK-AVX1-LABEL: test_urem_allones: 627; CHECK-AVX1: # %bb.0: 628; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 629; CHECK-AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 630; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 631; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 632; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 633; CHECK-AVX1-NEXT: retq 634; 635; CHECK-AVX2-LABEL: test_urem_allones: 636; CHECK-AVX2: # %bb.0: 637; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 638; CHECK-AVX2-NEXT: vpsubd %xmm0, %xmm1, %xmm0 639; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] 640; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1 641; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 642; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 643; CHECK-AVX2-NEXT: retq 644; 645; CHECK-AVX512VL-LABEL: test_urem_allones: 646; CHECK-AVX512VL: # %bb.0: 647; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 648; CHECK-AVX512VL-NEXT: vpsubd %xmm0, %xmm1, %xmm0 649; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm1 650; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 651; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 652; CHECK-AVX512VL-NEXT: retq 653 %urem = urem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295> 654 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 655 %ret = zext <4 x i1> %cmp to <4 x i32> 656 ret <4 x i32> %ret 657} 658