1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind { 6; X32-LABEL: knownbits_mask_extract_sext: 7; X32: # %bb.0: 8; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 9; X32-NEXT: vpextrw $0, %xmm0, %eax 10; X32-NEXT: retl 11; 12; X64-LABEL: knownbits_mask_extract_sext: 13; X64: # %bb.0: 14; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 15; X64-NEXT: vpextrw $0, %xmm0, %eax 16; X64-NEXT: retq 17 %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 18 %2 = extractelement <8 x i16> %1, i32 0 19 %3 = sext i16 %2 to i32 20 ret i32 %3 21} 22 23define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { 24; X32-LABEL: knownbits_mask_extract_uitofp: 25; X32: # %bb.0: 26; X32-NEXT: pushl %eax 27; X32-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 28; X32-NEXT: vmovd %xmm0, %eax 29; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 30; X32-NEXT: vmovss %xmm0, (%esp) 31; X32-NEXT: flds (%esp) 32; X32-NEXT: popl %eax 33; X32-NEXT: retl 34; 35; X64-LABEL: knownbits_mask_extract_uitofp: 36; X64: # %bb.0: 37; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 38; X64-NEXT: vmovq %xmm0, %rax 39; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 40; X64-NEXT: retq 41 %1 = and <2 x i64> %a0, <i64 65535, i64 -1> 42 %2 = extractelement <2 x i64> %1, i32 0 43 %3 = uitofp i64 %2 to float 44 ret float %3 45} 46 47define <4 x float> @knownbits_insert_uitofp(<4 x i32> %a0, i16 %a1, i16 %a2) nounwind { 48; X32-LABEL: knownbits_insert_uitofp: 49; X32: # %bb.0: 50; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 51; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 52; X32-NEXT: vmovd %ecx, %xmm0 53; X32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 54; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 55; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 56; X32-NEXT: retl 57; 58; X64-LABEL: knownbits_insert_uitofp: 59; X64: # %bb.0: 60; X64-NEXT: movzwl %di, %eax 61; X64-NEXT: movzwl %si, %ecx 62; X64-NEXT: vmovd %eax, %xmm0 63; X64-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 64; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] 65; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 66; X64-NEXT: retq 67 %1 = zext i16 %a1 to i32 68 %2 = zext i16 %a2 to i32 69 %3 = insertelement <4 x i32> %a0, i32 %1, i32 0 70 %4 = insertelement <4 x i32> %3, i32 %2, i32 2 71 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 72 %6 = uitofp <4 x i32> %5 to <4 x float> 73 ret <4 x float> %6 74} 75 76define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind { 77; X32-LABEL: knownbits_mask_shuffle_sext: 78; X32: # %bb.0: 79; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 80; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 81; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 82; X32-NEXT: retl 83; 84; X64-LABEL: knownbits_mask_shuffle_sext: 85; X64: # %bb.0: 86; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 87; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 88; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 89; X64-NEXT: retq 90 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 91 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 92 %3 = sext <4 x i16> %2 to <4 x i32> 93 ret <4 x i32> %3 94} 95 96define <4 x i32> @knownbits_mask_shuffle_shuffle_sext(<8 x i16> %a0) nounwind { 97; X32-LABEL: knownbits_mask_shuffle_shuffle_sext: 98; X32: # %bb.0: 99; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 100; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 101; X32-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 102; X32-NEXT: retl 103; 104; X64-LABEL: knownbits_mask_shuffle_shuffle_sext: 105; X64: # %bb.0: 106; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 107; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 108; X64-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 109; X64-NEXT: retq 110 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 111 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> 112 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 113 %4 = sext <4 x i16> %3 to <4 x i32> 114 ret <4 x i32> %4 115} 116 117define <4 x i32> @knownbits_mask_shuffle_shuffle_undef_sext(<8 x i16> %a0) nounwind { 118; X32-LABEL: knownbits_mask_shuffle_shuffle_undef_sext: 119; X32: # %bb.0: 120; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 121; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 122; X32-NEXT: vpmovsxwd %xmm0, %xmm0 123; X32-NEXT: retl 124; 125; X64-LABEL: knownbits_mask_shuffle_shuffle_undef_sext: 126; X64: # %bb.0: 127; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 128; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 129; X64-NEXT: vpmovsxwd %xmm0, %xmm0 130; X64-NEXT: retq 131 %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> 132 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 133 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 134 %4 = sext <4 x i16> %3 to <4 x i32> 135 ret <4 x i32> %4 136} 137 138define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind { 139; X32-LABEL: knownbits_mask_shuffle_uitofp: 140; X32: # %bb.0: 141; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 142; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 143; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 144; X32-NEXT: retl 145; 146; X64-LABEL: knownbits_mask_shuffle_uitofp: 147; X64: # %bb.0: 148; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 149; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 150; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 151; X64-NEXT: retq 152 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 153 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 154 %3 = uitofp <4 x i32> %2 to <4 x float> 155 ret <4 x float> %3 156} 157 158define <4 x float> @knownbits_mask_or_shuffle_uitofp(<4 x i32> %a0) nounwind { 159; X32-LABEL: knownbits_mask_or_shuffle_uitofp: 160; X32: # %bb.0: 161; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0 162; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 163; X32-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 164; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 165; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 166; X32-NEXT: retl 167; 168; X64-LABEL: knownbits_mask_or_shuffle_uitofp: 169; X64: # %bb.0: 170; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 171; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 172; X64-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6] 173; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 174; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 175; X64-NEXT: retq 176 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 177 %2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 178 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 179 %4 = uitofp <4 x i32> %3 to <4 x float> 180 ret <4 x float> %4 181} 182 183define <4 x float> @knownbits_mask_xor_shuffle_uitofp(<4 x i32> %a0) nounwind { 184; X32-LABEL: knownbits_mask_xor_shuffle_uitofp: 185; X32: # %bb.0: 186; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 187; X32-NEXT: vxorps {{\.LCPI.*}}, %xmm0, %xmm0 188; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 189; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 190; X32-NEXT: retl 191; 192; X64-LABEL: knownbits_mask_xor_shuffle_uitofp: 193; X64: # %bb.0: 194; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 195; X64-NEXT: vxorps {{.*}}(%rip), %xmm0, %xmm0 196; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 197; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 198; X64-NEXT: retq 199 %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> 200 %2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535> 201 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 202 %4 = uitofp <4 x i32> %3 to <4 x float> 203 ret <4 x float> %4 204} 205 206define <4 x i32> @knownbits_mask_shl_shuffle_lshr(<4 x i32> %a0) nounwind { 207; X32-LABEL: knownbits_mask_shl_shuffle_lshr: 208; X32: # %bb.0: 209; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 210; X32-NEXT: retl 211; 212; X64-LABEL: knownbits_mask_shl_shuffle_lshr: 213; X64: # %bb.0: 214; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 215; X64-NEXT: retq 216 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536> 217 %2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17> 218 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 219 %4 = lshr <4 x i32> %3, <i32 15, i32 15, i32 15, i32 15> 220 ret <4 x i32> %4 221} 222 223define <4 x i32> @knownbits_mask_ashr_shuffle_lshr(<4 x i32> %a0) nounwind { 224; X32-LABEL: knownbits_mask_ashr_shuffle_lshr: 225; X32: # %bb.0: 226; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 227; X32-NEXT: retl 228; 229; X64-LABEL: knownbits_mask_ashr_shuffle_lshr: 230; X64: # %bb.0: 231; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 232; X64-NEXT: retq 233 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 -1, i32 131071> 234 %2 = ashr <4 x i32> %1, <i32 15, i32 15, i32 15, i32 15> 235 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 236 %4 = lshr <4 x i32> %3, <i32 30, i32 30, i32 30, i32 30> 237 ret <4 x i32> %4 238} 239 240define <4 x i32> @knownbits_mask_mul_shuffle_shl(<4 x i32> %a0, <4 x i32> %a1) nounwind { 241; X32-LABEL: knownbits_mask_mul_shuffle_shl: 242; X32: # %bb.0: 243; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 244; X32-NEXT: retl 245; 246; X64-LABEL: knownbits_mask_mul_shuffle_shl: 247; X64: # %bb.0: 248; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 249; X64-NEXT: retq 250 %1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536> 251 %2 = mul <4 x i32> %a1, %1 252 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 253 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 254 ret <4 x i32> %4 255} 256 257define <4 x i32> @knownbits_mask_trunc_shuffle_shl(<4 x i64> %a0) nounwind { 258; X32-LABEL: knownbits_mask_trunc_shuffle_shl: 259; X32: # %bb.0: 260; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 261; X32-NEXT: retl 262; 263; X64-LABEL: knownbits_mask_trunc_shuffle_shl: 264; X64: # %bb.0: 265; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 266; X64-NEXT: retq 267 %1 = and <4 x i64> %a0, <i64 -65536, i64 -7, i64 7, i64 -65536> 268 %2 = trunc <4 x i64> %1 to <4 x i32> 269 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 270 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 271 ret <4 x i32> %4 272} 273 274define <4 x i32> @knownbits_mask_add_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 275; X32-LABEL: knownbits_mask_add_shuffle_lshr: 276; X32: # %bb.0: 277; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 278; X32-NEXT: retl 279; 280; X64-LABEL: knownbits_mask_add_shuffle_lshr: 281; X64: # %bb.0: 282; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 283; X64-NEXT: retq 284 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 285 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767> 286 %3 = add <4 x i32> %1, %2 287 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 288 %5 = lshr <4 x i32> %4, <i32 17, i32 17, i32 17, i32 17> 289 ret <4 x i32> %5 290} 291 292define <4 x i32> @knownbits_mask_sub_shuffle_lshr(<4 x i32> %a0) nounwind { 293; X32-LABEL: knownbits_mask_sub_shuffle_lshr: 294; X32: # %bb.0: 295; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 296; X32-NEXT: retl 297; 298; X64-LABEL: knownbits_mask_sub_shuffle_lshr: 299; X64: # %bb.0: 300; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 301; X64-NEXT: retq 302 %1 = and <4 x i32> %a0, <i32 15, i32 -1, i32 -1, i32 15> 303 %2 = sub <4 x i32> <i32 255, i32 255, i32 255, i32 255>, %1 304 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 305 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 306 ret <4 x i32> %4 307} 308 309define <4 x i32> @knownbits_mask_udiv_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 310; X32-LABEL: knownbits_mask_udiv_shuffle_lshr: 311; X32: # %bb.0: 312; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 313; X32-NEXT: retl 314; 315; X64-LABEL: knownbits_mask_udiv_shuffle_lshr: 316; X64: # %bb.0: 317; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 318; X64-NEXT: retq 319 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 320 %2 = udiv <4 x i32> %1, %a1 321 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 322 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 323 ret <4 x i32> %4 324} 325 326define <4 x i32> @knownbits_urem_lshr(<4 x i32> %a0) nounwind { 327; X32-LABEL: knownbits_urem_lshr: 328; X32: # %bb.0: 329; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 330; X32-NEXT: retl 331; 332; X64-LABEL: knownbits_urem_lshr: 333; X64: # %bb.0: 334; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 335; X64-NEXT: retq 336 %1 = urem <4 x i32> %a0, <i32 16, i32 16, i32 16, i32 16> 337 %2 = lshr <4 x i32> %1, <i32 22, i32 22, i32 22, i32 22> 338 ret <4 x i32> %2 339} 340 341define <4 x i32> @knownbits_mask_urem_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) nounwind { 342; X32-LABEL: knownbits_mask_urem_shuffle_lshr: 343; X32: # %bb.0: 344; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 345; X32-NEXT: retl 346; 347; X64-LABEL: knownbits_mask_urem_shuffle_lshr: 348; X64: # %bb.0: 349; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 350; X64-NEXT: retq 351 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 352 %2 = and <4 x i32> %a1, <i32 32767, i32 -1, i32 -1, i32 32767> 353 %3 = urem <4 x i32> %1, %2 354 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 355 %5 = lshr <4 x i32> %4, <i32 22, i32 22, i32 22, i32 22> 356 ret <4 x i32> %5 357} 358 359define <4 x i32> @knownbits_mask_srem_shuffle_lshr(<4 x i32> %a0) nounwind { 360; X32-LABEL: knownbits_mask_srem_shuffle_lshr: 361; X32: # %bb.0: 362; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 363; X32-NEXT: retl 364; 365; X64-LABEL: knownbits_mask_srem_shuffle_lshr: 366; X64: # %bb.0: 367; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 368; X64-NEXT: retq 369 %1 = and <4 x i32> %a0, <i32 -32768, i32 -1, i32 -1, i32 -32768> 370 %2 = srem <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16> 371 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 372 %4 = lshr <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 373 ret <4 x i32> %4 374} 375 376define <4 x i32> @knownbits_mask_bswap_shuffle_shl(<4 x i32> %a0) nounwind { 377; X32-LABEL: knownbits_mask_bswap_shuffle_shl: 378; X32: # %bb.0: 379; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 380; X32-NEXT: retl 381; 382; X64-LABEL: knownbits_mask_bswap_shuffle_shl: 383; X64: # %bb.0: 384; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 385; X64-NEXT: retq 386 %1 = and <4 x i32> %a0, <i32 32767, i32 -1, i32 -1, i32 32767> 387 %2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %1) 388 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 389 %4 = shl <4 x i32> %3, <i32 22, i32 22, i32 22, i32 22> 390 ret <4 x i32> %4 391} 392declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 393 394define <8 x float> @knownbits_mask_concat_uitofp(<4 x i32> %a0, <4 x i32> %a1) nounwind { 395; X32-LABEL: knownbits_mask_concat_uitofp: 396; X32: # %bb.0: 397; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 398; X32-NEXT: vandps {{\.LCPI.*}}, %xmm1, %xmm1 399; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2] 400; X32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3] 401; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 402; X32-NEXT: vcvtdq2ps %ymm0, %ymm0 403; X32-NEXT: retl 404; 405; X64-LABEL: knownbits_mask_concat_uitofp: 406; X64: # %bb.0: 407; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 408; X64-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 409; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2] 410; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,3,1,3] 411; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 412; X64-NEXT: vcvtdq2ps %ymm0, %ymm0 413; X64-NEXT: retq 414 %1 = and <4 x i32> %a0, <i32 131071, i32 -1, i32 131071, i32 -1> 415 %2 = and <4 x i32> %a1, <i32 -1, i32 131071, i32 -1, i32 131071> 416 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 2, i32 0, i32 2, i32 5, i32 7, i32 5, i32 7> 417 %4 = uitofp <8 x i32> %3 to <8 x float> 418 ret <8 x float> %4 419} 420 421define <4 x float> @knownbits_lshr_bitcast_shuffle_uitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind { 422; X32-LABEL: knownbits_lshr_bitcast_shuffle_uitofp: 423; X32: # %bb.0: 424; X32-NEXT: vpsrlq $1, %xmm0, %xmm0 425; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 426; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 427; X32-NEXT: retl 428; 429; X64-LABEL: knownbits_lshr_bitcast_shuffle_uitofp: 430; X64: # %bb.0: 431; X64-NEXT: vpsrlq $1, %xmm0, %xmm0 432; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 433; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 434; X64-NEXT: retq 435 %1 = lshr <2 x i64> %a0, <i64 1, i64 1> 436 %2 = bitcast <2 x i64> %1 to <4 x i32> 437 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 438 %4 = uitofp <4 x i32> %3 to <4 x float> 439 ret <4 x float> %4 440} 441 442define <4 x float> @knownbits_smax_smin_shuffle_uitofp(<4 x i32> %a0) { 443; X32-LABEL: knownbits_smax_smin_shuffle_uitofp: 444; X32: # %bb.0: 445; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0 446; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0 447; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 448; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 449; X32-NEXT: vpsrld $16, %xmm0, %xmm0 450; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 451; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 452; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 453; X32-NEXT: retl 454; 455; X64-LABEL: knownbits_smax_smin_shuffle_uitofp: 456; X64: # %bb.0: 457; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 458; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 459; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 460; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 461; X64-NEXT: vpsrld $16, %xmm0, %xmm0 462; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 463; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 464; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 465; X64-NEXT: retq 466 %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> <i32 0, i32 -65535, i32 -65535, i32 0>) 467 %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 131071>) 468 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 469 %4 = uitofp <4 x i32> %3 to <4 x float> 470 ret <4 x float> %4 471} 472declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 473declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 474 475define <4 x float> @knownbits_umin_shuffle_uitofp(<4 x i32> %a0) { 476; X32-LABEL: knownbits_umin_shuffle_uitofp: 477; X32: # %bb.0: 478; X32-NEXT: vpminud {{\.LCPI.*}}, %xmm0, %xmm0 479; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 480; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 481; X32-NEXT: retl 482; 483; X64-LABEL: knownbits_umin_shuffle_uitofp: 484; X64: # %bb.0: 485; X64-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm0 486; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 487; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 488; X64-NEXT: retq 489 %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>) 490 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 491 %3 = uitofp <4 x i32> %2 to <4 x float> 492 ret <4 x float> %3 493} 494declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 495declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 496 497define <4 x i32> @knownbits_umax_shuffle_ashr(<4 x i32> %a0) { 498; X32-LABEL: knownbits_umax_shuffle_ashr: 499; X32: # %bb.0: 500; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0 501; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] 502; X32-NEXT: retl 503; 504; X64-LABEL: knownbits_umax_shuffle_ashr: 505; X64: # %bb.0: 506; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 507; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,2] 508; X64-NEXT: retq 509 %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> <i32 65535, i32 -1, i32 -1, i32 262143>) 510 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2> 511 %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31> 512 ret <4 x i32> %3 513} 514 515define <4 x float> @knownbits_mask_umax_shuffle_uitofp(<4 x i32> %a0) { 516; X32-LABEL: knownbits_mask_umax_shuffle_uitofp: 517; X32: # %bb.0: 518; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 519; X32-NEXT: vpmaxud {{\.LCPI.*}}, %xmm0, %xmm0 520; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 521; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 522; X32-NEXT: retl 523; 524; X64-LABEL: knownbits_mask_umax_shuffle_uitofp: 525; X64: # %bb.0: 526; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 527; X64-NEXT: vpmaxud {{.*}}(%rip), %xmm0, %xmm0 528; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] 529; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 530; X64-NEXT: retq 531 %1 = and <4 x i32> %a0, <i32 65535, i32 -1, i32 -1, i32 262143> 532 %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> <i32 255, i32 -1, i32 -1, i32 1023>) 533 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3> 534 %4 = uitofp <4 x i32> %3 to <4 x float> 535 ret <4 x float> %4 536} 537 538define <4 x i32> @knownbits_mask_bitreverse_ashr(<4 x i32> %a0) { 539; X32-LABEL: knownbits_mask_bitreverse_ashr: 540; X32: # %bb.0: 541; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0 542; X32-NEXT: retl 543; 544; X64-LABEL: knownbits_mask_bitreverse_ashr: 545; X64: # %bb.0: 546; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 547; X64-NEXT: retq 548 %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 -2, i32 -2> 549 %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %1) 550 %3 = ashr <4 x i32> %2, <i32 31, i32 31, i32 31, i32 31> 551 ret <4 x i32> %3 552} 553declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) nounwind readnone 554 555; If we don't know that the input isn't INT_MIN we can't combine to sitofp 556define <4 x float> @knownbits_abs_uitofp(<4 x i32> %a0) { 557; X32-LABEL: knownbits_abs_uitofp: 558; X32: # %bb.0: 559; X32-NEXT: vpabsd %xmm0, %xmm0 560; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 561; X32-NEXT: vpsrld $16, %xmm0, %xmm0 562; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 563; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 564; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 565; X32-NEXT: retl 566; 567; X64-LABEL: knownbits_abs_uitofp: 568; X64: # %bb.0: 569; X64-NEXT: vpabsd %xmm0, %xmm0 570; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 571; X64-NEXT: vpsrld $16, %xmm0, %xmm0 572; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 573; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 574; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 575; X64-NEXT: retq 576 %1 = sub <4 x i32> zeroinitializer, %a0 577 %2 = icmp slt <4 x i32> %a0, zeroinitializer 578 %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> %a0 579 %4 = uitofp <4 x i32> %3 to <4 x float> 580 ret <4 x float> %4 581} 582 583define <4 x float> @knownbits_or_abs_uitofp(<4 x i32> %a0) { 584; X32-LABEL: knownbits_or_abs_uitofp: 585; X32: # %bb.0: 586; X32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0 587; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 588; X32-NEXT: vpabsd %xmm0, %xmm0 589; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 590; X32-NEXT: retl 591; 592; X64-LABEL: knownbits_or_abs_uitofp: 593; X64: # %bb.0: 594; X64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 595; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2] 596; X64-NEXT: vpabsd %xmm0, %xmm0 597; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 598; X64-NEXT: retq 599 %1 = or <4 x i32> %a0, <i32 1, i32 0, i32 3, i32 0> 600 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> 601 %3 = sub <4 x i32> zeroinitializer, %2 602 %4 = icmp slt <4 x i32> %2, zeroinitializer 603 %5 = select <4 x i1> %4, <4 x i32> %3, <4 x i32> %2 604 %6 = uitofp <4 x i32> %5 to <4 x float> 605 ret <4 x float> %6 606} 607 608define <4 x float> @knownbits_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind { 609; X32-LABEL: knownbits_and_select_shuffle_uitofp: 610; X32: # %bb.0: 611; X32-NEXT: pushl %ebp 612; X32-NEXT: movl %esp, %ebp 613; X32-NEXT: andl $-16, %esp 614; X32-NEXT: subl $16, %esp 615; X32-NEXT: vmovaps 8(%ebp), %xmm3 616; X32-NEXT: vandps {{\.LCPI.*}}, %xmm2, %xmm2 617; X32-NEXT: vandps {{\.LCPI.*}}, %xmm3, %xmm3 618; X32-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 619; X32-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 620; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2] 621; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 622; X32-NEXT: movl %ebp, %esp 623; X32-NEXT: popl %ebp 624; X32-NEXT: retl 625; 626; X64-LABEL: knownbits_and_select_shuffle_uitofp: 627; X64: # %bb.0: 628; X64-NEXT: vandps {{.*}}(%rip), %xmm2, %xmm2 629; X64-NEXT: vandps {{.*}}(%rip), %xmm3, %xmm3 630; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 631; X64-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 632; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2] 633; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 634; X64-NEXT: retq 635 %1 = and <4 x i32> %a2, <i32 65535, i32 -1, i32 255, i32 -1> 636 %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1> 637 %3 = icmp eq <4 x i32> %a0, %a1 638 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 639 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 640 %6 = uitofp <4 x i32> %5 to <4 x float> 641 ret <4 x float> %6 642} 643 644define <4 x float> @knownbits_lshr_and_select_shuffle_uitofp(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) nounwind { 645; X32-LABEL: knownbits_lshr_and_select_shuffle_uitofp: 646; X32: # %bb.0: 647; X32-NEXT: pushl %ebp 648; X32-NEXT: movl %esp, %ebp 649; X32-NEXT: andl $-16, %esp 650; X32-NEXT: subl $16, %esp 651; X32-NEXT: vmovaps 8(%ebp), %xmm3 652; X32-NEXT: vpsrld $5, %xmm2, %xmm2 653; X32-NEXT: vandps {{\.LCPI.*}}, %xmm3, %xmm3 654; X32-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 655; X32-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 656; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2] 657; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 658; X32-NEXT: movl %ebp, %esp 659; X32-NEXT: popl %ebp 660; X32-NEXT: retl 661; 662; X64-LABEL: knownbits_lshr_and_select_shuffle_uitofp: 663; X64: # %bb.0: 664; X64-NEXT: vpsrld $5, %xmm2, %xmm2 665; X64-NEXT: vandps {{.*}}(%rip), %xmm3, %xmm3 666; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 667; X64-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0 668; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,2] 669; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 670; X64-NEXT: retq 671 %1 = lshr <4 x i32> %a2, <i32 5, i32 1, i32 5, i32 1> 672 %2 = and <4 x i32> %a3, <i32 255, i32 -1, i32 65535, i32 -1> 673 %3 = icmp eq <4 x i32> %a0, %a1 674 %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 675 %5 = shufflevector <4 x i32> %4, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 676 %6 = uitofp <4 x i32> %5 to <4 x float> 677 ret <4 x float> %6 678} 679