1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5define <2 x double> @signbits_sext_v2i64_sitofp_v2f64(i32 %a0, i32 %a1) nounwind { 6; X32-LABEL: signbits_sext_v2i64_sitofp_v2f64: 7; X32: # %bb.0: 8; X32-NEXT: vcvtdq2pd {{[0-9]+}}(%esp), %xmm0 9; X32-NEXT: retl 10; 11; X64-LABEL: signbits_sext_v2i64_sitofp_v2f64: 12; X64: # %bb.0: 13; X64-NEXT: vmovd %edi, %xmm0 14; X64-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 15; X64-NEXT: vcvtdq2pd %xmm0, %xmm0 16; X64-NEXT: retq 17 %1 = sext i32 %a0 to i64 18 %2 = sext i32 %a1 to i64 19 %3 = insertelement <2 x i64> undef, i64 %1, i32 0 20 %4 = insertelement <2 x i64> %3, i64 %2, i32 1 21 %5 = sitofp <2 x i64> %4 to <2 x double> 22 ret <2 x double> %5 23} 24 25define <4 x float> @signbits_sext_v4i64_sitofp_v4f32(i8 signext %a0, i16 signext %a1, i32 %a2, i32 %a3) nounwind { 26; X32-LABEL: signbits_sext_v4i64_sitofp_v4f32: 27; X32: # %bb.0: 28; X32-NEXT: movswl {{[0-9]+}}(%esp), %eax 29; X32-NEXT: movsbl {{[0-9]+}}(%esp), %ecx 30; X32-NEXT: vmovd %ecx, %xmm0 31; X32-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 32; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 33; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 34; X32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 35; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 36; X32-NEXT: retl 37; 38; X64-LABEL: signbits_sext_v4i64_sitofp_v4f32: 39; X64: # %bb.0: 40; X64-NEXT: movslq %edi, %rax 41; X64-NEXT: movslq %esi, %rsi 42; X64-NEXT: movslq %edx, %rdx 43; X64-NEXT: movslq %ecx, %rcx 44; X64-NEXT: vmovq %rcx, %xmm0 45; X64-NEXT: vmovq %rdx, %xmm1 46; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 47; X64-NEXT: vmovq %rsi, %xmm1 48; X64-NEXT: vmovq %rax, %xmm2 49; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 50; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[0,2] 51; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 52; X64-NEXT: retq 53 %1 = sext i8 %a0 to i64 54 %2 = sext i16 %a1 to i64 55 %3 = sext i32 %a2 to i64 56 %4 = sext i32 %a3 to i64 57 %5 = insertelement <4 x i64> undef, i64 %1, i32 0 58 %6 = insertelement <4 x i64> %5, i64 %2, i32 1 59 %7 = insertelement <4 x i64> %6, i64 %3, i32 2 60 %8 = insertelement <4 x i64> %7, i64 %4, i32 3 61 %9 = sitofp <4 x i64> %8 to <4 x float> 62 ret <4 x float> %9 63} 64 65define float @signbits_ashr_extract_sitofp_0(<2 x i64> %a0) nounwind { 66; X32-LABEL: signbits_ashr_extract_sitofp_0: 67; X32: # %bb.0: 68; X32-NEXT: pushl %eax 69; X32-NEXT: vextractps $1, %xmm0, %eax 70; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 71; X32-NEXT: vmovss %xmm0, (%esp) 72; X32-NEXT: flds (%esp) 73; X32-NEXT: popl %eax 74; X32-NEXT: retl 75; 76; X64-LABEL: signbits_ashr_extract_sitofp_0: 77; X64: # %bb.0: 78; X64-NEXT: vpsrad $31, %xmm0, %xmm1 79; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 80; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 81; X64-NEXT: vmovq %xmm0, %rax 82; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 83; X64-NEXT: retq 84 %1 = ashr <2 x i64> %a0, <i64 32, i64 32> 85 %2 = extractelement <2 x i64> %1, i32 0 86 %3 = sitofp i64 %2 to float 87 ret float %3 88} 89 90define float @signbits_ashr_extract_sitofp_1(<2 x i64> %a0) nounwind { 91; X32-LABEL: signbits_ashr_extract_sitofp_1: 92; X32: # %bb.0: 93; X32-NEXT: pushl %eax 94; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648] 95; X32-NEXT: vpsrlq $63, %xmm1, %xmm2 96; X32-NEXT: vpsrlq $32, %xmm1, %xmm1 97; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 98; X32-NEXT: vpsrlq $63, %xmm0, %xmm2 99; X32-NEXT: vpsrlq $32, %xmm0, %xmm0 100; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 101; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0 102; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0 103; X32-NEXT: vmovd %xmm0, %eax 104; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0 105; X32-NEXT: vmovss %xmm0, (%esp) 106; X32-NEXT: flds (%esp) 107; X32-NEXT: popl %eax 108; X32-NEXT: retl 109; 110; X64-LABEL: signbits_ashr_extract_sitofp_1: 111; X64: # %bb.0: 112; X64-NEXT: vpsrlq $63, %xmm0, %xmm1 113; X64-NEXT: vpsrlq $32, %xmm0, %xmm0 114; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 115; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483648,1] 116; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0 117; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0 118; X64-NEXT: vmovq %xmm0, %rax 119; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 120; X64-NEXT: retq 121 %1 = ashr <2 x i64> %a0, <i64 32, i64 63> 122 %2 = extractelement <2 x i64> %1, i32 0 123 %3 = sitofp i64 %2 to float 124 ret float %3 125} 126 127define float @signbits_ashr_shl_extract_sitofp(<2 x i64> %a0) nounwind { 128; X32-LABEL: signbits_ashr_shl_extract_sitofp: 129; X32: # %bb.0: 130; X32-NEXT: pushl %eax 131; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648] 132; X32-NEXT: vpsrlq $60, %xmm1, %xmm2 133; X32-NEXT: vpsrlq $61, %xmm1, %xmm1 134; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] 135; X32-NEXT: vpsrlq $60, %xmm0, %xmm2 136; X32-NEXT: vpsrlq $61, %xmm0, %xmm0 137; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 138; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0 139; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0 140; X32-NEXT: vpsllq $20, %xmm0, %xmm0 141; X32-NEXT: vmovd %xmm0, %eax 142; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0 143; X32-NEXT: vmovss %xmm0, (%esp) 144; X32-NEXT: flds (%esp) 145; X32-NEXT: popl %eax 146; X32-NEXT: retl 147; 148; X64-LABEL: signbits_ashr_shl_extract_sitofp: 149; X64: # %bb.0: 150; X64-NEXT: vpsrlq $60, %xmm0, %xmm1 151; X64-NEXT: vpsrlq $61, %xmm0, %xmm0 152; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 153; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8] 154; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0 155; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0 156; X64-NEXT: vpsllq $20, %xmm0, %xmm0 157; X64-NEXT: vmovq %xmm0, %rax 158; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 159; X64-NEXT: retq 160 %1 = ashr <2 x i64> %a0, <i64 61, i64 60> 161 %2 = shl <2 x i64> %1, <i64 20, i64 16> 162 %3 = extractelement <2 x i64> %2, i32 0 163 %4 = sitofp i64 %3 to float 164 ret float %4 165} 166 167define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwind { 168; X32-LABEL: signbits_ashr_insert_ashr_extract_sitofp: 169; X32: # %bb.0: 170; X32-NEXT: pushl %eax 171; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 172; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 173; X32-NEXT: shrdl $30, %ecx, %eax 174; X32-NEXT: sarl $30, %ecx 175; X32-NEXT: vmovd %eax, %xmm0 176; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 177; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 178; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 179; X32-NEXT: vpsrlq $3, %xmm0, %xmm0 180; X32-NEXT: vmovd %xmm0, %eax 181; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 182; X32-NEXT: vmovss %xmm0, (%esp) 183; X32-NEXT: flds (%esp) 184; X32-NEXT: popl %eax 185; X32-NEXT: retl 186; 187; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp: 188; X64: # %bb.0: 189; X64-NEXT: sarq $30, %rdi 190; X64-NEXT: vmovq %rsi, %xmm0 191; X64-NEXT: vmovq %rdi, %xmm1 192; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 193; X64-NEXT: vpsrad $3, %xmm0, %xmm1 194; X64-NEXT: vpsrlq $3, %xmm0, %xmm0 195; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 196; X64-NEXT: vmovq %xmm0, %rax 197; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 198; X64-NEXT: retq 199 %1 = ashr i64 %a0, 30 200 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 201 %3 = insertelement <2 x i64> %2, i64 %a1, i32 1 202 %4 = ashr <2 x i64> %3, <i64 3, i64 3> 203 %5 = extractelement <2 x i64> %4, i32 0 204 %6 = sitofp i64 %5 to float 205 ret float %6 206} 207 208define <4 x double> @signbits_sext_shuffle_sitofp(<4 x i32> %a0, <4 x i64> %a1) nounwind { 209; X32-LABEL: signbits_sext_shuffle_sitofp: 210; X32: # %bb.0: 211; X32-NEXT: vpmovsxdq %xmm0, %xmm1 212; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 213; X32-NEXT: vpmovsxdq %xmm0, %xmm0 214; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 215; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 216; X32-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 217; X32-NEXT: vextractf128 $1, %ymm0, %xmm1 218; X32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 219; X32-NEXT: vcvtdq2pd %xmm0, %ymm0 220; X32-NEXT: retl 221; 222; X64-LABEL: signbits_sext_shuffle_sitofp: 223; X64: # %bb.0: 224; X64-NEXT: vpmovsxdq %xmm0, %xmm1 225; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 226; X64-NEXT: vpmovsxdq %xmm0, %xmm0 227; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 228; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 229; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 230; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 231; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 232; X64-NEXT: vcvtdq2pd %xmm0, %ymm0 233; X64-NEXT: retq 234 %1 = sext <4 x i32> %a0 to <4 x i64> 235 %2 = shufflevector <4 x i64> %1, <4 x i64>%a1, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 236 %3 = sitofp <4 x i64> %2 to <4 x double> 237 ret <4 x double> %3 238} 239 240define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4 x i64> %a1) nounwind { 241; X32-LABEL: signbits_ashr_concat_ashr_extract_sitofp: 242; X32: # %bb.0: 243; X32-NEXT: vpsrad $16, %xmm0, %xmm1 244; X32-NEXT: vpsrlq $16, %xmm0, %xmm0 245; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 246; X32-NEXT: vpsrlq $16, %xmm0, %xmm0 247; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 248; X32-NEXT: vcvtdq2pd %xmm0, %xmm0 249; X32-NEXT: retl 250; 251; X64-LABEL: signbits_ashr_concat_ashr_extract_sitofp: 252; X64: # %bb.0: 253; X64-NEXT: vpsrad $16, %xmm0, %xmm1 254; X64-NEXT: vpsrlq $16, %xmm0, %xmm0 255; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 256; X64-NEXT: vpsrlq $16, %xmm0, %xmm0 257; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 258; X64-NEXT: vcvtdq2pd %xmm0, %xmm0 259; X64-NEXT: retq 260 %1 = ashr <2 x i64> %a0, <i64 16, i64 16> 261 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 262 %3 = shufflevector <4 x i64> %a1, <4 x i64> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 263 %4 = ashr <4 x i64> %3, <i64 16, i64 16, i64 16, i64 16> 264 %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 265 %6 = sitofp <2 x i64> %5 to <2 x double> 266 ret <2 x double> %6 267} 268 269define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2 x i64> %a1, i32 %a2) nounwind { 270; X32-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp: 271; X32: # %bb.0: 272; X32-NEXT: pushl %eax 273; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 274; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 275; X32-NEXT: vpsrlq $60, %xmm2, %xmm3 276; X32-NEXT: vpsrlq $61, %xmm2, %xmm2 277; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 278; X32-NEXT: vpsrlq $60, %xmm0, %xmm3 279; X32-NEXT: vpsrlq $61, %xmm0, %xmm0 280; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 281; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0 282; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0 283; X32-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1 284; X32-NEXT: sarl $31, %eax 285; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 286; X32-NEXT: vpsllq $20, %xmm1, %xmm1 287; X32-NEXT: vpsrad $20, %xmm1, %xmm2 288; X32-NEXT: vpsrlq $20, %xmm1, %xmm1 289; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 290; X32-NEXT: vpand %xmm1, %xmm0, %xmm0 291; X32-NEXT: vmovd %xmm0, %eax 292; X32-NEXT: vcvtsi2ssl %eax, %xmm4, %xmm0 293; X32-NEXT: vmovss %xmm0, (%esp) 294; X32-NEXT: flds (%esp) 295; X32-NEXT: popl %eax 296; X32-NEXT: retl 297; 298; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp: 299; X64: # %bb.0: 300; X64-NEXT: vpsrlq $60, %xmm0, %xmm2 301; X64-NEXT: vpsrlq $61, %xmm0, %xmm0 302; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 303; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8] 304; X64-NEXT: vpxor %xmm2, %xmm0, %xmm0 305; X64-NEXT: vpsubq %xmm2, %xmm0, %xmm0 306; X64-NEXT: movslq %edi, %rax 307; X64-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1 308; X64-NEXT: vpsllq $20, %xmm1, %xmm1 309; X64-NEXT: vpsrad $20, %xmm1, %xmm2 310; X64-NEXT: vpsrlq $20, %xmm1, %xmm1 311; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 312; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 313; X64-NEXT: vmovq %xmm0, %rax 314; X64-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0 315; X64-NEXT: retq 316 %1 = ashr <2 x i64> %a0, <i64 61, i64 60> 317 %2 = sext i32 %a2 to i64 318 %3 = insertelement <2 x i64> %a1, i64 %2, i32 0 319 %4 = shl <2 x i64> %3, <i64 20, i64 20> 320 %5 = ashr <2 x i64> %4, <i64 20, i64 20> 321 %6 = and <2 x i64> %1, %5 322 %7 = extractelement <2 x i64> %6, i32 0 323 %8 = sitofp i64 %7 to float 324 ret float %8 325} 326 327define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp(<2 x i64> %a0, <4 x i32> %a1) nounwind { 328; X32-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: 329; X32: # %bb.0: 330; X32-NEXT: pushl %eax 331; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648] 332; X32-NEXT: vpsrlq $60, %xmm2, %xmm3 333; X32-NEXT: vpsrlq $61, %xmm2, %xmm2 334; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] 335; X32-NEXT: vpsrlq $60, %xmm0, %xmm3 336; X32-NEXT: vpsrlq $61, %xmm0, %xmm0 337; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 338; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0 339; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0 340; X32-NEXT: vpmovsxdq %xmm1, %xmm1 341; X32-NEXT: vpand %xmm1, %xmm0, %xmm2 342; X32-NEXT: vpor %xmm1, %xmm2, %xmm1 343; X32-NEXT: vpxor %xmm0, %xmm1, %xmm0 344; X32-NEXT: vmovd %xmm0, %eax 345; X32-NEXT: vcvtsi2ssl %eax, %xmm4, %xmm0 346; X32-NEXT: vmovss %xmm0, (%esp) 347; X32-NEXT: flds (%esp) 348; X32-NEXT: popl %eax 349; X32-NEXT: retl 350; 351; X64-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp: 352; X64: # %bb.0: 353; X64-NEXT: vpsrlq $60, %xmm0, %xmm2 354; X64-NEXT: vpsrlq $61, %xmm0, %xmm0 355; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 356; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8] 357; X64-NEXT: vpxor %xmm2, %xmm0, %xmm0 358; X64-NEXT: vpsubq %xmm2, %xmm0, %xmm0 359; X64-NEXT: vpmovsxdq %xmm1, %xmm1 360; X64-NEXT: vpand %xmm1, %xmm0, %xmm2 361; X64-NEXT: vpor %xmm1, %xmm2, %xmm1 362; X64-NEXT: vpxor %xmm0, %xmm1, %xmm0 363; X64-NEXT: vmovq %xmm0, %rax 364; X64-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0 365; X64-NEXT: retq 366 %1 = ashr <2 x i64> %a0, <i64 61, i64 60> 367 %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 368 %3 = sext <2 x i32> %2 to <2 x i64> 369 %4 = and <2 x i64> %1, %3 370 %5 = or <2 x i64> %4, %3 371 %6 = xor <2 x i64> %5, %1 372 %7 = extractelement <2 x i64> %6, i32 0 373 %8 = sitofp i64 %7 to float 374 ret float %8 375} 376 377define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i32> %a3) nounwind { 378; X32-LABEL: signbits_ashr_sext_select_shuffle_sitofp: 379; X32: # %bb.0: 380; X32-NEXT: pushl %ebp 381; X32-NEXT: movl %esp, %ebp 382; X32-NEXT: andl $-16, %esp 383; X32-NEXT: subl $16, %esp 384; X32-NEXT: vmovdqa {{.*#+}} xmm3 = [33,0,63,0] 385; X32-NEXT: vmovdqa {{.*#+}} xmm4 = [0,2147483648,0,2147483648] 386; X32-NEXT: vpsrlq %xmm3, %xmm4, %xmm5 387; X32-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[2,3,0,1] 388; X32-NEXT: vpsrlq %xmm6, %xmm4, %xmm4 389; X32-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7] 390; X32-NEXT: vextractf128 $1, %ymm2, %xmm5 391; X32-NEXT: vpsrlq %xmm6, %xmm5, %xmm7 392; X32-NEXT: vpsrlq %xmm3, %xmm5, %xmm5 393; X32-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm7[4,5,6,7] 394; X32-NEXT: vpsrlq %xmm6, %xmm2, %xmm6 395; X32-NEXT: vpsrlq %xmm3, %xmm2, %xmm2 396; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] 397; X32-NEXT: vpmovsxdq 16(%ebp), %xmm3 398; X32-NEXT: vpxor %xmm4, %xmm5, %xmm5 399; X32-NEXT: vpsubq %xmm4, %xmm5, %xmm5 400; X32-NEXT: vpxor %xmm4, %xmm2, %xmm2 401; X32-NEXT: vpsubq %xmm4, %xmm2, %xmm2 402; X32-NEXT: vpmovsxdq 8(%ebp), %xmm4 403; X32-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2 404; X32-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 405; X32-NEXT: vextractf128 $1, %ymm1, %xmm4 406; X32-NEXT: vextractf128 $1, %ymm0, %xmm5 407; X32-NEXT: vpcmpeqq %xmm4, %xmm5, %xmm4 408; X32-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 409; X32-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 410; X32-NEXT: vblendvpd %ymm0, %ymm2, %ymm3, %ymm0 411; X32-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 412; X32-NEXT: vextractf128 $1, %ymm0, %xmm1 413; X32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 414; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 415; X32-NEXT: movl %ebp, %esp 416; X32-NEXT: popl %ebp 417; X32-NEXT: vzeroupper 418; X32-NEXT: retl 419; 420; X64-LABEL: signbits_ashr_sext_select_shuffle_sitofp: 421; X64: # %bb.0: 422; X64-NEXT: vextractf128 $1, %ymm2, %xmm4 423; X64-NEXT: vpsrlq $63, %xmm4, %xmm5 424; X64-NEXT: vpsrlq $33, %xmm4, %xmm4 425; X64-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm5[4,5,6,7] 426; X64-NEXT: vmovdqa {{.*#+}} xmm5 = [1073741824,1] 427; X64-NEXT: vpxor %xmm5, %xmm4, %xmm4 428; X64-NEXT: vpsubq %xmm5, %xmm4, %xmm4 429; X64-NEXT: vpsrlq $63, %xmm2, %xmm6 430; X64-NEXT: vpsrlq $33, %xmm2, %xmm2 431; X64-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] 432; X64-NEXT: vpxor %xmm5, %xmm2, %xmm2 433; X64-NEXT: vpsubq %xmm5, %xmm2, %xmm2 434; X64-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 435; X64-NEXT: vpmovsxdq %xmm3, %xmm4 436; X64-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] 437; X64-NEXT: vpmovsxdq %xmm3, %xmm3 438; X64-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3 439; X64-NEXT: vextractf128 $1, %ymm1, %xmm4 440; X64-NEXT: vextractf128 $1, %ymm0, %xmm5 441; X64-NEXT: vpcmpeqq %xmm4, %xmm5, %xmm4 442; X64-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 443; X64-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 444; X64-NEXT: vblendvpd %ymm0, %ymm2, %ymm3, %ymm0 445; X64-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 446; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 447; X64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 448; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 449; X64-NEXT: vzeroupper 450; X64-NEXT: retq 451 %1 = ashr <4 x i64> %a2, <i64 33, i64 63, i64 33, i64 63> 452 %2 = sext <4 x i32> %a3 to <4 x i64> 453 %3 = icmp eq <4 x i64> %a0, %a1 454 %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2 455 %5 = shufflevector <4 x i64> %4, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 456 %6 = sitofp <4 x i64> %5 to <4 x float> 457 ret <4 x float> %6 458} 459