1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW 9 10 11define <16 x float> @sitof32(<16 x i32> %a) nounwind { 12; ALL-LABEL: sitof32: 13; ALL: # %bb.0: 14; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 15; ALL-NEXT: retq 16 %b = sitofp <16 x i32> %a to <16 x float> 17 ret <16 x float> %b 18} 19 20define <8 x double> @sltof864(<8 x i64> %a) { 21; NODQ-LABEL: sltof864: 22; NODQ: # %bb.0: 23; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1 24; NODQ-NEXT: vpextrq $1, %xmm1, %rax 25; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 26; NODQ-NEXT: vmovq %xmm1, %rax 27; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 28; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 29; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 30; NODQ-NEXT: vpextrq $1, %xmm2, %rax 31; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3 32; NODQ-NEXT: vmovq %xmm2, %rax 33; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 34; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 35; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 36; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 37; NODQ-NEXT: vpextrq $1, %xmm2, %rax 38; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3 39; NODQ-NEXT: vmovq %xmm2, %rax 40; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 41; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 42; NODQ-NEXT: vpextrq $1, %xmm0, %rax 43; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3 44; NODQ-NEXT: vmovq %xmm0, %rax 45; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0 46; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] 47; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 48; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 49; NODQ-NEXT: retq 50; 51; VLDQ-LABEL: sltof864: 52; VLDQ: # %bb.0: 53; VLDQ-NEXT: vcvtqq2pd %zmm0, %zmm0 54; VLDQ-NEXT: retq 55; 56; DQNOVL-LABEL: sltof864: 57; DQNOVL: # %bb.0: 58; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 59; DQNOVL-NEXT: retq 60 %b = sitofp <8 x i64> %a to <8 x double> 61 ret <8 x double> %b 62} 63 64define <4 x double> @slto4f64(<4 x i64> %a) { 65; NODQ-LABEL: slto4f64: 66; NODQ: # %bb.0: 67; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm1 68; NODQ-NEXT: vpextrq $1, %xmm1, %rax 69; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 70; NODQ-NEXT: vmovq %xmm1, %rax 71; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 72; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 73; NODQ-NEXT: vpextrq $1, %xmm0, %rax 74; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 75; NODQ-NEXT: vmovq %xmm0, %rax 76; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 77; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 78; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 79; NODQ-NEXT: retq 80; 81; VLDQ-LABEL: slto4f64: 82; VLDQ: # %bb.0: 83; VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0 84; VLDQ-NEXT: retq 85; 86; DQNOVL-LABEL: slto4f64: 87; DQNOVL: # %bb.0: 88; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 89; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 90; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 91; DQNOVL-NEXT: retq 92 %b = sitofp <4 x i64> %a to <4 x double> 93 ret <4 x double> %b 94} 95 96define <2 x double> @slto2f64(<2 x i64> %a) { 97; NODQ-LABEL: slto2f64: 98; NODQ: # %bb.0: 99; NODQ-NEXT: vpextrq $1, %xmm0, %rax 100; NODQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 101; NODQ-NEXT: vmovq %xmm0, %rax 102; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 103; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 104; NODQ-NEXT: retq 105; 106; VLDQ-LABEL: slto2f64: 107; VLDQ: # %bb.0: 108; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0 109; VLDQ-NEXT: retq 110; 111; DQNOVL-LABEL: slto2f64: 112; DQNOVL: # %bb.0: 113; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 114; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 115; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 116; DQNOVL-NEXT: vzeroupper 117; DQNOVL-NEXT: retq 118 %b = sitofp <2 x i64> %a to <2 x double> 119 ret <2 x double> %b 120} 121 122define <2 x float> @sltof2f32(<2 x i64> %a) { 123; NODQ-LABEL: sltof2f32: 124; NODQ: # %bb.0: 125; NODQ-NEXT: vpextrq $1, %xmm0, %rax 126; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 127; NODQ-NEXT: vmovq %xmm0, %rax 128; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 129; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 130; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 131; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] 132; NODQ-NEXT: retq 133; 134; VLDQ-LABEL: sltof2f32: 135; VLDQ: # %bb.0: 136; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 137; VLDQ-NEXT: retq 138; 139; DQNOVL-LABEL: sltof2f32: 140; DQNOVL: # %bb.0: 141; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 142; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 143; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 144; DQNOVL-NEXT: vzeroupper 145; DQNOVL-NEXT: retq 146 %b = sitofp <2 x i64> %a to <2 x float> 147 ret <2 x float>%b 148} 149 150define <4 x float> @slto4f32_mem(<4 x i64>* %a) { 151; NODQ-LABEL: slto4f32_mem: 152; NODQ: # %bb.0: 153; NODQ-NEXT: vmovdqu (%rdi), %ymm0 154; NODQ-NEXT: vpextrq $1, %xmm0, %rax 155; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 156; NODQ-NEXT: vmovq %xmm0, %rax 157; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 158; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 159; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 160; NODQ-NEXT: vmovq %xmm0, %rax 161; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 162; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 163; NODQ-NEXT: vpextrq $1, %xmm0, %rax 164; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 165; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 166; NODQ-NEXT: vzeroupper 167; NODQ-NEXT: retq 168; 169; VLDQ-LABEL: slto4f32_mem: 170; VLDQ: # %bb.0: 171; VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0 172; VLDQ-NEXT: retq 173; 174; DQNOVL-LABEL: slto4f32_mem: 175; DQNOVL: # %bb.0: 176; DQNOVL-NEXT: vmovups (%rdi), %ymm0 177; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 178; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 179; DQNOVL-NEXT: vzeroupper 180; DQNOVL-NEXT: retq 181 %a1 = load <4 x i64>, <4 x i64>* %a, align 8 182 %b = sitofp <4 x i64> %a1 to <4 x float> 183 ret <4 x float>%b 184} 185 186define <4 x i64> @f64to4sl(<4 x double> %a) { 187; NODQ-LABEL: f64to4sl: 188; NODQ: # %bb.0: 189; NODQ-NEXT: vextractf128 $1, %ymm0, %xmm1 190; NODQ-NEXT: vcvttsd2si %xmm1, %rax 191; NODQ-NEXT: vmovq %rax, %xmm2 192; NODQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 193; NODQ-NEXT: vcvttsd2si %xmm1, %rax 194; NODQ-NEXT: vmovq %rax, %xmm1 195; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 196; NODQ-NEXT: vcvttsd2si %xmm0, %rax 197; NODQ-NEXT: vmovq %rax, %xmm2 198; NODQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 199; NODQ-NEXT: vcvttsd2si %xmm0, %rax 200; NODQ-NEXT: vmovq %rax, %xmm0 201; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 202; NODQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 203; NODQ-NEXT: retq 204; 205; VLDQ-LABEL: f64to4sl: 206; VLDQ: # %bb.0: 207; VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0 208; VLDQ-NEXT: retq 209; 210; DQNOVL-LABEL: f64to4sl: 211; DQNOVL: # %bb.0: 212; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 213; DQNOVL-NEXT: vcvttpd2qq %zmm0, %zmm0 214; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 215; DQNOVL-NEXT: retq 216 %b = fptosi <4 x double> %a to <4 x i64> 217 ret <4 x i64> %b 218} 219 220define <4 x i64> @f32to4sl(<4 x float> %a) { 221; NODQ-LABEL: f32to4sl: 222; NODQ: # %bb.0: 223; NODQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 224; NODQ-NEXT: vcvttss2si %xmm1, %rax 225; NODQ-NEXT: vmovq %rax, %xmm1 226; NODQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 227; NODQ-NEXT: vcvttss2si %xmm2, %rax 228; NODQ-NEXT: vmovq %rax, %xmm2 229; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 230; NODQ-NEXT: vcvttss2si %xmm0, %rax 231; NODQ-NEXT: vmovq %rax, %xmm2 232; NODQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 233; NODQ-NEXT: vcvttss2si %xmm0, %rax 234; NODQ-NEXT: vmovq %rax, %xmm0 235; NODQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 236; NODQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 237; NODQ-NEXT: retq 238; 239; VLDQ-LABEL: f32to4sl: 240; VLDQ: # %bb.0: 241; VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 242; VLDQ-NEXT: retq 243; 244; DQNOVL-LABEL: f32to4sl: 245; DQNOVL: # %bb.0: 246; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 247; DQNOVL-NEXT: vcvttps2qq %ymm0, %zmm0 248; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 249; DQNOVL-NEXT: retq 250 %b = fptosi <4 x float> %a to <4 x i64> 251 ret <4 x i64> %b 252} 253 254define <4 x float> @slto4f32(<4 x i64> %a) { 255; NODQ-LABEL: slto4f32: 256; NODQ: # %bb.0: 257; NODQ-NEXT: vpextrq $1, %xmm0, %rax 258; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 259; NODQ-NEXT: vmovq %xmm0, %rax 260; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 261; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 262; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 263; NODQ-NEXT: vmovq %xmm0, %rax 264; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 265; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 266; NODQ-NEXT: vpextrq $1, %xmm0, %rax 267; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 268; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 269; NODQ-NEXT: vzeroupper 270; NODQ-NEXT: retq 271; 272; VLDQ-LABEL: slto4f32: 273; VLDQ: # %bb.0: 274; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 275; VLDQ-NEXT: vzeroupper 276; VLDQ-NEXT: retq 277; 278; DQNOVL-LABEL: slto4f32: 279; DQNOVL: # %bb.0: 280; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 281; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 282; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 283; DQNOVL-NEXT: vzeroupper 284; DQNOVL-NEXT: retq 285 %b = sitofp <4 x i64> %a to <4 x float> 286 ret <4 x float> %b 287} 288 289define <4 x float> @ulto4f32(<4 x i64> %a) { 290; NODQ-LABEL: ulto4f32: 291; NODQ: # %bb.0: 292; NODQ-NEXT: vpextrq $1, %xmm0, %rax 293; NODQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 294; NODQ-NEXT: vmovq %xmm0, %rax 295; NODQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 296; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 297; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 298; NODQ-NEXT: vmovq %xmm0, %rax 299; NODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm2 300; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 301; NODQ-NEXT: vpextrq $1, %xmm0, %rax 302; NODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm0 303; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 304; NODQ-NEXT: vzeroupper 305; NODQ-NEXT: retq 306; 307; VLDQ-LABEL: ulto4f32: 308; VLDQ: # %bb.0: 309; VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 310; VLDQ-NEXT: vzeroupper 311; VLDQ-NEXT: retq 312; 313; DQNOVL-LABEL: ulto4f32: 314; DQNOVL: # %bb.0: 315; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 316; DQNOVL-NEXT: vcvtuqq2ps %zmm0, %ymm0 317; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 318; DQNOVL-NEXT: vzeroupper 319; DQNOVL-NEXT: retq 320 %b = uitofp <4 x i64> %a to <4 x float> 321 ret <4 x float> %b 322} 323 324define <8 x double> @ulto8f64(<8 x i64> %a) { 325; NODQ-LABEL: ulto8f64: 326; NODQ: # %bb.0: 327; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1 328; NODQ-NEXT: vpextrq $1, %xmm1, %rax 329; NODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 330; NODQ-NEXT: vmovq %xmm1, %rax 331; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 332; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 333; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 334; NODQ-NEXT: vpextrq $1, %xmm2, %rax 335; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3 336; NODQ-NEXT: vmovq %xmm2, %rax 337; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 338; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 339; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 340; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 341; NODQ-NEXT: vpextrq $1, %xmm2, %rax 342; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3 343; NODQ-NEXT: vmovq %xmm2, %rax 344; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 345; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 346; NODQ-NEXT: vpextrq $1, %xmm0, %rax 347; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3 348; NODQ-NEXT: vmovq %xmm0, %rax 349; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm0 350; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] 351; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 352; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 353; NODQ-NEXT: retq 354; 355; VLDQ-LABEL: ulto8f64: 356; VLDQ: # %bb.0: 357; VLDQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 358; VLDQ-NEXT: retq 359; 360; DQNOVL-LABEL: ulto8f64: 361; DQNOVL: # %bb.0: 362; DQNOVL-NEXT: vcvtuqq2pd %zmm0, %zmm0 363; DQNOVL-NEXT: retq 364 %b = uitofp <8 x i64> %a to <8 x double> 365 ret <8 x double> %b 366} 367 368define <16 x double> @ulto16f64(<16 x i64> %a) { 369; NODQ-LABEL: ulto16f64: 370; NODQ: # %bb.0: 371; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2 372; NODQ-NEXT: vpextrq $1, %xmm2, %rax 373; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3 374; NODQ-NEXT: vmovq %xmm2, %rax 375; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 376; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 377; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3 378; NODQ-NEXT: vpextrq $1, %xmm3, %rax 379; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm4 380; NODQ-NEXT: vmovq %xmm3, %rax 381; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 382; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] 383; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 384; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm3 385; NODQ-NEXT: vpextrq $1, %xmm3, %rax 386; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 387; NODQ-NEXT: vmovq %xmm3, %rax 388; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 389; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] 390; NODQ-NEXT: vpextrq $1, %xmm0, %rax 391; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 392; NODQ-NEXT: vmovq %xmm0, %rax 393; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0 394; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] 395; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 396; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 397; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2 398; NODQ-NEXT: vpextrq $1, %xmm2, %rax 399; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 400; NODQ-NEXT: vmovq %xmm2, %rax 401; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2 402; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 403; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3 404; NODQ-NEXT: vpextrq $1, %xmm3, %rax 405; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 406; NODQ-NEXT: vmovq %xmm3, %rax 407; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 408; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] 409; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 410; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm3 411; NODQ-NEXT: vpextrq $1, %xmm3, %rax 412; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 413; NODQ-NEXT: vmovq %xmm3, %rax 414; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 415; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] 416; NODQ-NEXT: vpextrq $1, %xmm1, %rax 417; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 418; NODQ-NEXT: vmovq %xmm1, %rax 419; NODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1 420; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] 421; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 422; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 423; NODQ-NEXT: retq 424; 425; VLDQ-LABEL: ulto16f64: 426; VLDQ: # %bb.0: 427; VLDQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 428; VLDQ-NEXT: vcvtuqq2pd %zmm1, %zmm1 429; VLDQ-NEXT: retq 430; 431; DQNOVL-LABEL: ulto16f64: 432; DQNOVL: # %bb.0: 433; DQNOVL-NEXT: vcvtuqq2pd %zmm0, %zmm0 434; DQNOVL-NEXT: vcvtuqq2pd %zmm1, %zmm1 435; DQNOVL-NEXT: retq 436 %b = uitofp <16 x i64> %a to <16 x double> 437 ret <16 x double> %b 438} 439 440define <16 x i32> @f64to16si(<16 x float> %a) nounwind { 441; ALL-LABEL: f64to16si: 442; ALL: # %bb.0: 443; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 444; ALL-NEXT: retq 445 %b = fptosi <16 x float> %a to <16 x i32> 446 ret <16 x i32> %b 447} 448 449define <16 x i8> @f32to16sc(<16 x float> %f) { 450; ALL-LABEL: f32to16sc: 451; ALL: # %bb.0: 452; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 453; ALL-NEXT: vpmovdb %zmm0, %xmm0 454; ALL-NEXT: vzeroupper 455; ALL-NEXT: retq 456 %res = fptosi <16 x float> %f to <16 x i8> 457 ret <16 x i8> %res 458} 459 460define <16 x i16> @f32to16ss(<16 x float> %f) { 461; ALL-LABEL: f32to16ss: 462; ALL: # %bb.0: 463; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 464; ALL-NEXT: vpmovdw %zmm0, %ymm0 465; ALL-NEXT: retq 466 %res = fptosi <16 x float> %f to <16 x i16> 467 ret <16 x i16> %res 468} 469 470define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { 471; ALL-LABEL: f32to16ui: 472; ALL: # %bb.0: 473; ALL-NEXT: vcvttps2udq %zmm0, %zmm0 474; ALL-NEXT: retq 475 %b = fptoui <16 x float> %a to <16 x i32> 476 ret <16 x i32> %b 477} 478 479define <16 x i8> @f32to16uc(<16 x float> %f) { 480; ALL-LABEL: f32to16uc: 481; ALL: # %bb.0: 482; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 483; ALL-NEXT: vpmovdb %zmm0, %xmm0 484; ALL-NEXT: vzeroupper 485; ALL-NEXT: retq 486 %res = fptoui <16 x float> %f to <16 x i8> 487 ret <16 x i8> %res 488} 489 490define <16 x i16> @f32to16us(<16 x float> %f) { 491; ALL-LABEL: f32to16us: 492; ALL: # %bb.0: 493; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 494; ALL-NEXT: vpmovdw %zmm0, %ymm0 495; ALL-NEXT: retq 496 %res = fptoui <16 x float> %f to <16 x i16> 497 ret <16 x i16> %res 498} 499 500define <8 x i32> @f32to8ui(<8 x float> %a) nounwind { 501; NOVL-LABEL: f32to8ui: 502; NOVL: # %bb.0: 503; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 504; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0 505; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 506; NOVL-NEXT: retq 507; 508; VL-LABEL: f32to8ui: 509; VL: # %bb.0: 510; VL-NEXT: vcvttps2udq %ymm0, %ymm0 511; VL-NEXT: retq 512 %b = fptoui <8 x float> %a to <8 x i32> 513 ret <8 x i32> %b 514} 515 516define <4 x i32> @f32to4ui(<4 x float> %a) nounwind { 517; NOVL-LABEL: f32to4ui: 518; NOVL: # %bb.0: 519; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 520; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0 521; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 522; NOVL-NEXT: vzeroupper 523; NOVL-NEXT: retq 524; 525; VL-LABEL: f32to4ui: 526; VL: # %bb.0: 527; VL-NEXT: vcvttps2udq %xmm0, %xmm0 528; VL-NEXT: retq 529 %b = fptoui <4 x float> %a to <4 x i32> 530 ret <4 x i32> %b 531} 532 533define <8 x i32> @f64to8ui(<8 x double> %a) nounwind { 534; ALL-LABEL: f64to8ui: 535; ALL: # %bb.0: 536; ALL-NEXT: vcvttpd2udq %zmm0, %ymm0 537; ALL-NEXT: retq 538 %b = fptoui <8 x double> %a to <8 x i32> 539 ret <8 x i32> %b 540} 541 542define <8 x i16> @f64to8us(<8 x double> %f) { 543; NOVL-LABEL: f64to8us: 544; NOVL: # %bb.0: 545; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 546; NOVL-NEXT: vpmovdw %zmm0, %ymm0 547; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 548; NOVL-NEXT: vzeroupper 549; NOVL-NEXT: retq 550; 551; VL-LABEL: f64to8us: 552; VL: # %bb.0: 553; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 554; VL-NEXT: vpmovdw %ymm0, %xmm0 555; VL-NEXT: vzeroupper 556; VL-NEXT: retq 557 %res = fptoui <8 x double> %f to <8 x i16> 558 ret <8 x i16> %res 559} 560 561define <8 x i8> @f64to8uc(<8 x double> %f) { 562; NOVL-LABEL: f64to8uc: 563; NOVL: # %bb.0: 564; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 565; NOVL-NEXT: vpmovdw %zmm0, %ymm0 566; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 567; NOVL-NEXT: vzeroupper 568; NOVL-NEXT: retq 569; 570; VL-LABEL: f64to8uc: 571; VL: # %bb.0: 572; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 573; VL-NEXT: vpmovdw %ymm0, %xmm0 574; VL-NEXT: vzeroupper 575; VL-NEXT: retq 576 %res = fptoui <8 x double> %f to <8 x i8> 577 ret <8 x i8> %res 578} 579 580define <4 x i32> @f64to4ui(<4 x double> %a) nounwind { 581; NOVL-LABEL: f64to4ui: 582; NOVL: # %bb.0: 583; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 584; NOVL-NEXT: vcvttpd2udq %zmm0, %ymm0 585; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 586; NOVL-NEXT: vzeroupper 587; NOVL-NEXT: retq 588; 589; VL-LABEL: f64to4ui: 590; VL: # %bb.0: 591; VL-NEXT: vcvttpd2udq %ymm0, %xmm0 592; VL-NEXT: vzeroupper 593; VL-NEXT: retq 594 %b = fptoui <4 x double> %a to <4 x i32> 595 ret <4 x i32> %b 596} 597 598define <8 x double> @sito8f64(<8 x i32> %a) { 599; ALL-LABEL: sito8f64: 600; ALL: # %bb.0: 601; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 602; ALL-NEXT: retq 603 %b = sitofp <8 x i32> %a to <8 x double> 604 ret <8 x double> %b 605} 606define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { 607; KNL-LABEL: i32to8f64_mask: 608; KNL: # %bb.0: 609; KNL-NEXT: kmovw %edi, %k1 610; KNL-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 611; KNL-NEXT: retq 612; 613; VLBW-LABEL: i32to8f64_mask: 614; VLBW: # %bb.0: 615; VLBW-NEXT: kmovd %edi, %k1 616; VLBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 617; VLBW-NEXT: retq 618; 619; VLNOBW-LABEL: i32to8f64_mask: 620; VLNOBW: # %bb.0: 621; VLNOBW-NEXT: kmovw %edi, %k1 622; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 623; VLNOBW-NEXT: retq 624; 625; DQNOVL-LABEL: i32to8f64_mask: 626; DQNOVL: # %bb.0: 627; DQNOVL-NEXT: kmovw %edi, %k1 628; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 629; DQNOVL-NEXT: retq 630; 631; AVX512BW-LABEL: i32to8f64_mask: 632; AVX512BW: # %bb.0: 633; AVX512BW-NEXT: kmovd %edi, %k1 634; AVX512BW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} 635; AVX512BW-NEXT: retq 636 %1 = bitcast i8 %c to <8 x i1> 637 %2 = sitofp <8 x i32> %b to <8 x double> 638 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a 639 ret <8 x double> %3 640} 641define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { 642; KNL-LABEL: sito8f64_maskz: 643; KNL: # %bb.0: 644; KNL-NEXT: kmovw %edi, %k1 645; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 646; KNL-NEXT: retq 647; 648; VLBW-LABEL: sito8f64_maskz: 649; VLBW: # %bb.0: 650; VLBW-NEXT: kmovd %edi, %k1 651; VLBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 652; VLBW-NEXT: retq 653; 654; VLNOBW-LABEL: sito8f64_maskz: 655; VLNOBW: # %bb.0: 656; VLNOBW-NEXT: kmovw %edi, %k1 657; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 658; VLNOBW-NEXT: retq 659; 660; DQNOVL-LABEL: sito8f64_maskz: 661; DQNOVL: # %bb.0: 662; DQNOVL-NEXT: kmovw %edi, %k1 663; DQNOVL-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 664; DQNOVL-NEXT: retq 665; 666; AVX512BW-LABEL: sito8f64_maskz: 667; AVX512BW: # %bb.0: 668; AVX512BW-NEXT: kmovd %edi, %k1 669; AVX512BW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} 670; AVX512BW-NEXT: retq 671 %1 = bitcast i8 %b to <8 x i1> 672 %2 = sitofp <8 x i32> %a to <8 x double> 673 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer 674 ret <8 x double> %3 675} 676 677define <8 x i32> @f64to8si(<8 x double> %a) { 678; ALL-LABEL: f64to8si: 679; ALL: # %bb.0: 680; ALL-NEXT: vcvttpd2dq %zmm0, %ymm0 681; ALL-NEXT: retq 682 %b = fptosi <8 x double> %a to <8 x i32> 683 ret <8 x i32> %b 684} 685 686define <4 x i32> @f64to4si(<4 x double> %a) { 687; ALL-LABEL: f64to4si: 688; ALL: # %bb.0: 689; ALL-NEXT: vcvttpd2dq %ymm0, %xmm0 690; ALL-NEXT: vzeroupper 691; ALL-NEXT: retq 692 %b = fptosi <4 x double> %a to <4 x i32> 693 ret <4 x i32> %b 694} 695 696define <16 x float> @f64to16f32(<16 x double> %b) nounwind { 697; ALL-LABEL: f64to16f32: 698; ALL: # %bb.0: 699; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0 700; ALL-NEXT: vcvtpd2ps %zmm1, %ymm1 701; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 702; ALL-NEXT: retq 703 %a = fptrunc <16 x double> %b to <16 x float> 704 ret <16 x float> %a 705} 706 707define <4 x float> @f64to4f32(<4 x double> %b) { 708; ALL-LABEL: f64to4f32: 709; ALL: # %bb.0: 710; ALL-NEXT: vcvtpd2ps %ymm0, %xmm0 711; ALL-NEXT: vzeroupper 712; ALL-NEXT: retq 713 %a = fptrunc <4 x double> %b to <4 x float> 714 ret <4 x float> %a 715} 716 717define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { 718; NOVLDQ-LABEL: f64to4f32_mask: 719; NOVLDQ: # %bb.0: 720; NOVLDQ-NEXT: vpslld $31, %xmm1, %xmm1 721; NOVLDQ-NEXT: vptestmd %zmm1, %zmm1, %k1 722; NOVLDQ-NEXT: vcvtpd2ps %ymm0, %xmm0 723; NOVLDQ-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} 724; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 725; NOVLDQ-NEXT: vzeroupper 726; NOVLDQ-NEXT: retq 727; 728; VLDQ-LABEL: f64to4f32_mask: 729; VLDQ: # %bb.0: 730; VLDQ-NEXT: vpslld $31, %xmm1, %xmm1 731; VLDQ-NEXT: vpmovd2m %xmm1, %k1 732; VLDQ-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} 733; VLDQ-NEXT: vzeroupper 734; VLDQ-NEXT: retq 735; 736; VLNODQ-LABEL: f64to4f32_mask: 737; VLNODQ: # %bb.0: 738; VLNODQ-NEXT: vpslld $31, %xmm1, %xmm1 739; VLNODQ-NEXT: vptestmd %xmm1, %xmm1, %k1 740; VLNODQ-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} 741; VLNODQ-NEXT: vzeroupper 742; VLNODQ-NEXT: retq 743; 744; DQNOVL-LABEL: f64to4f32_mask: 745; DQNOVL: # %bb.0: 746; DQNOVL-NEXT: vpslld $31, %xmm1, %xmm1 747; DQNOVL-NEXT: vpmovd2m %zmm1, %k1 748; DQNOVL-NEXT: vcvtpd2ps %ymm0, %xmm0 749; DQNOVL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} 750; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 751; DQNOVL-NEXT: vzeroupper 752; DQNOVL-NEXT: retq 753 %a = fptrunc <4 x double> %b to <4 x float> 754 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer 755 ret <4 x float> %c 756} 757 758define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { 759; ALL-LABEL: f64tof32_inreg: 760; ALL: # %bb.0: 761; ALL-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 762; ALL-NEXT: retq 763 %ext = extractelement <2 x double> %a0, i32 0 764 %cvt = fptrunc double %ext to float 765 %res = insertelement <4 x float> %a1, float %cvt, i32 0 766 ret <4 x float> %res 767} 768 769define <8 x double> @f32to8f64(<8 x float> %b) nounwind { 770; ALL-LABEL: f32to8f64: 771; ALL: # %bb.0: 772; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 773; ALL-NEXT: retq 774 %a = fpext <8 x float> %b to <8 x double> 775 ret <8 x double> %a 776} 777 778define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { 779; NOVL-LABEL: f32to4f64_mask: 780; NOVL: # %bb.0: 781; NOVL-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 782; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 783; NOVL-NEXT: vcvtps2pd %xmm0, %ymm0 784; NOVL-NEXT: vcmpltpd %zmm2, %zmm1, %k1 785; NOVL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} 786; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 787; NOVL-NEXT: retq 788; 789; VL-LABEL: f32to4f64_mask: 790; VL: # %bb.0: 791; VL-NEXT: vcmpltpd %ymm2, %ymm1, %k1 792; VL-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} 793; VL-NEXT: retq 794 %a = fpext <4 x float> %b to <4 x double> 795 %mask = fcmp ogt <4 x double> %a1, %b1 796 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer 797 ret <4 x double> %c 798} 799 800define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind { 801; ALL-LABEL: f32tof64_inreg: 802; ALL: # %bb.0: 803; ALL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 804; ALL-NEXT: retq 805 %ext = extractelement <4 x float> %a1, i32 0 806 %cvt = fpext float %ext to double 807 %res = insertelement <2 x double> %a0, double %cvt, i32 0 808 ret <2 x double> %res 809} 810 811define double @sltof64_load(i64* nocapture %e) { 812; ALL-LABEL: sltof64_load: 813; ALL: # %bb.0: # %entry 814; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 815; ALL-NEXT: retq 816entry: 817 %tmp1 = load i64, i64* %e, align 8 818 %conv = sitofp i64 %tmp1 to double 819 ret double %conv 820} 821 822define double @sitof64_load(i32* %e) { 823; ALL-LABEL: sitof64_load: 824; ALL: # %bb.0: # %entry 825; ALL-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 826; ALL-NEXT: retq 827entry: 828 %tmp1 = load i32, i32* %e, align 4 829 %conv = sitofp i32 %tmp1 to double 830 ret double %conv 831} 832 833define float @sitof32_load(i32* %e) { 834; ALL-LABEL: sitof32_load: 835; ALL: # %bb.0: # %entry 836; ALL-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 837; ALL-NEXT: retq 838entry: 839 %tmp1 = load i32, i32* %e, align 4 840 %conv = sitofp i32 %tmp1 to float 841 ret float %conv 842} 843 844define float @sltof32_load(i64* %e) { 845; ALL-LABEL: sltof32_load: 846; ALL: # %bb.0: # %entry 847; ALL-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 848; ALL-NEXT: retq 849entry: 850 %tmp1 = load i64, i64* %e, align 8 851 %conv = sitofp i64 %tmp1 to float 852 ret float %conv 853} 854 855define void @f32tof64_loadstore() { 856; ALL-LABEL: f32tof64_loadstore: 857; ALL: # %bb.0: # %entry 858; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 859; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 860; ALL-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) 861; ALL-NEXT: retq 862entry: 863 %f = alloca float, align 4 864 %d = alloca double, align 8 865 %tmp = load float, float* %f, align 4 866 %conv = fpext float %tmp to double 867 store double %conv, double* %d, align 8 868 ret void 869} 870 871define void @f64tof32_loadstore() nounwind uwtable { 872; ALL-LABEL: f64tof32_loadstore: 873; ALL: # %bb.0: # %entry 874; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 875; ALL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 876; ALL-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 877; ALL-NEXT: retq 878entry: 879 %f = alloca float, align 4 880 %d = alloca double, align 8 881 %tmp = load double, double* %d, align 8 882 %conv = fptrunc double %tmp to float 883 store float %conv, float* %f, align 4 884 ret void 885} 886 887define double @long_to_double(i64 %x) { 888; ALL-LABEL: long_to_double: 889; ALL: # %bb.0: 890; ALL-NEXT: vmovq %rdi, %xmm0 891; ALL-NEXT: retq 892 %res = bitcast i64 %x to double 893 ret double %res 894} 895 896define i64 @double_to_long(double %x) { 897; ALL-LABEL: double_to_long: 898; ALL: # %bb.0: 899; ALL-NEXT: vmovq %xmm0, %rax 900; ALL-NEXT: retq 901 %res = bitcast double %x to i64 902 ret i64 %res 903} 904 905define float @int_to_float(i32 %x) { 906; ALL-LABEL: int_to_float: 907; ALL: # %bb.0: 908; ALL-NEXT: vmovd %edi, %xmm0 909; ALL-NEXT: retq 910 %res = bitcast i32 %x to float 911 ret float %res 912} 913 914define i32 @float_to_int(float %x) { 915; ALL-LABEL: float_to_int: 916; ALL: # %bb.0: 917; ALL-NEXT: vmovd %xmm0, %eax 918; ALL-NEXT: retq 919 %res = bitcast float %x to i32 920 ret i32 %res 921} 922 923define <16 x double> @uito16f64(<16 x i32> %a) nounwind { 924; ALL-LABEL: uito16f64: 925; ALL: # %bb.0: 926; ALL-NEXT: vcvtudq2pd %ymm0, %zmm2 927; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 928; ALL-NEXT: vcvtudq2pd %ymm0, %zmm1 929; ALL-NEXT: vmovaps %zmm2, %zmm0 930; ALL-NEXT: retq 931 %b = uitofp <16 x i32> %a to <16 x double> 932 ret <16 x double> %b 933} 934 935define <8 x float> @slto8f32(<8 x i64> %a) { 936; NODQ-LABEL: slto8f32: 937; NODQ: # %bb.0: 938; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm1 939; NODQ-NEXT: vpextrq $1, %xmm1, %rax 940; NODQ-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 941; NODQ-NEXT: vmovq %xmm1, %rax 942; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm1 943; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 944; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2 945; NODQ-NEXT: vmovq %xmm2, %rax 946; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3 947; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] 948; NODQ-NEXT: vpextrq $1, %xmm2, %rax 949; NODQ-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2 950; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 951; NODQ-NEXT: vpextrq $1, %xmm0, %rax 952; NODQ-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2 953; NODQ-NEXT: vmovq %xmm0, %rax 954; NODQ-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3 955; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 956; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 957; NODQ-NEXT: vmovq %xmm0, %rax 958; NODQ-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm3 959; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 960; NODQ-NEXT: vpextrq $1, %xmm0, %rax 961; NODQ-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm0 962; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 963; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 964; NODQ-NEXT: retq 965; 966; VLDQ-LABEL: slto8f32: 967; VLDQ: # %bb.0: 968; VLDQ-NEXT: vcvtqq2ps %zmm0, %ymm0 969; VLDQ-NEXT: retq 970; 971; DQNOVL-LABEL: slto8f32: 972; DQNOVL: # %bb.0: 973; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 974; DQNOVL-NEXT: retq 975 %b = sitofp <8 x i64> %a to <8 x float> 976 ret <8 x float> %b 977} 978 979define <16 x float> @slto16f32(<16 x i64> %a) { 980; NODQ-LABEL: slto16f32: 981; NODQ: # %bb.0: 982; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm2 983; NODQ-NEXT: vpextrq $1, %xmm2, %rax 984; NODQ-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3 985; NODQ-NEXT: vmovq %xmm2, %rax 986; NODQ-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm2 987; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 988; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm3 989; NODQ-NEXT: vmovq %xmm3, %rax 990; NODQ-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm4 991; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 992; NODQ-NEXT: vpextrq $1, %xmm3, %rax 993; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3 994; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 995; NODQ-NEXT: vpextrq $1, %xmm1, %rax 996; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3 997; NODQ-NEXT: vmovq %xmm1, %rax 998; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm4 999; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 1000; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm1 1001; NODQ-NEXT: vmovq %xmm1, %rax 1002; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm4 1003; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 1004; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1005; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm1 1006; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 1007; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1008; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 1009; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1010; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3 1011; NODQ-NEXT: vmovq %xmm2, %rax 1012; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2 1013; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1014; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3 1015; NODQ-NEXT: vmovq %xmm3, %rax 1016; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm4 1017; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 1018; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1019; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3 1020; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 1021; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1022; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm3 1023; NODQ-NEXT: vmovq %xmm0, %rax 1024; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm4 1025; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 1026; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1027; NODQ-NEXT: vmovq %xmm0, %rax 1028; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm4 1029; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 1030; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1031; NODQ-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm0 1032; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] 1033; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1034; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1035; NODQ-NEXT: retq 1036; 1037; VLDQ-LABEL: slto16f32: 1038; VLDQ: # %bb.0: 1039; VLDQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1040; VLDQ-NEXT: vcvtqq2ps %zmm1, %ymm1 1041; VLDQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1042; VLDQ-NEXT: retq 1043; 1044; DQNOVL-LABEL: slto16f32: 1045; DQNOVL: # %bb.0: 1046; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0 1047; DQNOVL-NEXT: vcvtqq2ps %zmm1, %ymm1 1048; DQNOVL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1049; DQNOVL-NEXT: retq 1050 %b = sitofp <16 x i64> %a to <16 x float> 1051 ret <16 x float> %b 1052} 1053 1054define <8 x double> @slto8f64(<8 x i64> %a) { 1055; NODQ-LABEL: slto8f64: 1056; NODQ: # %bb.0: 1057; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1 1058; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1059; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 1060; NODQ-NEXT: vmovq %xmm1, %rax 1061; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 1062; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1063; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 1064; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1065; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3 1066; NODQ-NEXT: vmovq %xmm2, %rax 1067; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 1068; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1069; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1070; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 1071; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1072; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3 1073; NODQ-NEXT: vmovq %xmm2, %rax 1074; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 1075; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1076; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1077; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3 1078; NODQ-NEXT: vmovq %xmm0, %rax 1079; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0 1080; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] 1081; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1082; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1083; NODQ-NEXT: retq 1084; 1085; VLDQ-LABEL: slto8f64: 1086; VLDQ: # %bb.0: 1087; VLDQ-NEXT: vcvtqq2pd %zmm0, %zmm0 1088; VLDQ-NEXT: retq 1089; 1090; DQNOVL-LABEL: slto8f64: 1091; DQNOVL: # %bb.0: 1092; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 1093; DQNOVL-NEXT: retq 1094 %b = sitofp <8 x i64> %a to <8 x double> 1095 ret <8 x double> %b 1096} 1097 1098define <16 x double> @slto16f64(<16 x i64> %a) { 1099; NODQ-LABEL: slto16f64: 1100; NODQ: # %bb.0: 1101; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2 1102; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1103; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3 1104; NODQ-NEXT: vmovq %xmm2, %rax 1105; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 1106; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1107; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3 1108; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1109; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm4 1110; NODQ-NEXT: vmovq %xmm3, %rax 1111; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 1112; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1113; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1114; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm3 1115; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1116; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 1117; NODQ-NEXT: vmovq %xmm3, %rax 1118; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 1119; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1120; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1121; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 1122; NODQ-NEXT: vmovq %xmm0, %rax 1123; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0 1124; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] 1125; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1126; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 1127; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2 1128; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1129; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 1130; NODQ-NEXT: vmovq %xmm2, %rax 1131; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2 1132; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1133; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3 1134; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1135; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 1136; NODQ-NEXT: vmovq %xmm3, %rax 1137; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 1138; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1139; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1140; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm3 1141; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1142; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 1143; NODQ-NEXT: vmovq %xmm3, %rax 1144; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 1145; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1146; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1147; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 1148; NODQ-NEXT: vmovq %xmm1, %rax 1149; NODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1 1150; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] 1151; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1152; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 1153; NODQ-NEXT: retq 1154; 1155; VLDQ-LABEL: slto16f64: 1156; VLDQ: # %bb.0: 1157; VLDQ-NEXT: vcvtqq2pd %zmm0, %zmm0 1158; VLDQ-NEXT: vcvtqq2pd %zmm1, %zmm1 1159; VLDQ-NEXT: retq 1160; 1161; DQNOVL-LABEL: slto16f64: 1162; DQNOVL: # %bb.0: 1163; DQNOVL-NEXT: vcvtqq2pd %zmm0, %zmm0 1164; DQNOVL-NEXT: vcvtqq2pd %zmm1, %zmm1 1165; DQNOVL-NEXT: retq 1166 %b = sitofp <16 x i64> %a to <16 x double> 1167 ret <16 x double> %b 1168} 1169 1170define <8 x float> @ulto8f32(<8 x i64> %a) { 1171; NODQ-LABEL: ulto8f32: 1172; NODQ: # %bb.0: 1173; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm1 1174; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1175; NODQ-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm2 1176; NODQ-NEXT: vmovq %xmm1, %rax 1177; NODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm1 1178; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 1179; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2 1180; NODQ-NEXT: vmovq %xmm2, %rax 1181; NODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm3 1182; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] 1183; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1184; NODQ-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2 1185; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 1186; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1187; NODQ-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2 1188; NODQ-NEXT: vmovq %xmm0, %rax 1189; NODQ-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3 1190; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 1191; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1192; NODQ-NEXT: vmovq %xmm0, %rax 1193; NODQ-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm3 1194; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 1195; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1196; NODQ-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm0 1197; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 1198; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1199; NODQ-NEXT: retq 1200; 1201; VLDQ-LABEL: ulto8f32: 1202; VLDQ: # %bb.0: 1203; VLDQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 1204; VLDQ-NEXT: retq 1205; 1206; DQNOVL-LABEL: ulto8f32: 1207; DQNOVL: # %bb.0: 1208; DQNOVL-NEXT: vcvtuqq2ps %zmm0, %ymm0 1209; DQNOVL-NEXT: retq 1210 %b = uitofp <8 x i64> %a to <8 x float> 1211 ret <8 x float> %b 1212} 1213 1214define <16 x float> @ulto16f32(<16 x i64> %a) { 1215; NODQ-LABEL: ulto16f32: 1216; NODQ: # %bb.0: 1217; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm2 1218; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1219; NODQ-NEXT: vcvtusi2ssq %rax, %xmm3, %xmm3 1220; NODQ-NEXT: vmovq %xmm2, %rax 1221; NODQ-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm2 1222; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1223; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm3 1224; NODQ-NEXT: vmovq %xmm3, %rax 1225; NODQ-NEXT: vcvtusi2ssq %rax, %xmm4, %xmm4 1226; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 1227; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1228; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm3 1229; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 1230; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1231; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm3 1232; NODQ-NEXT: vmovq %xmm1, %rax 1233; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm4 1234; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 1235; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm1 1236; NODQ-NEXT: vmovq %xmm1, %rax 1237; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm4 1238; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 1239; NODQ-NEXT: vpextrq $1, %xmm1, %rax 1240; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm1 1241; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 1242; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1243; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 1244; NODQ-NEXT: vpextrq $1, %xmm2, %rax 1245; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm3 1246; NODQ-NEXT: vmovq %xmm2, %rax 1247; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm2 1248; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1249; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3 1250; NODQ-NEXT: vmovq %xmm3, %rax 1251; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm4 1252; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] 1253; NODQ-NEXT: vpextrq $1, %xmm3, %rax 1254; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm3 1255; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] 1256; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1257; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm3 1258; NODQ-NEXT: vmovq %xmm0, %rax 1259; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm4 1260; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 1261; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0 1262; NODQ-NEXT: vmovq %xmm0, %rax 1263; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm4 1264; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 1265; NODQ-NEXT: vpextrq $1, %xmm0, %rax 1266; NODQ-NEXT: vcvtusi2ssq %rax, %xmm5, %xmm0 1267; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0] 1268; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1269; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1270; NODQ-NEXT: retq 1271; 1272; VLDQ-LABEL: ulto16f32: 1273; VLDQ: # %bb.0: 1274; VLDQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 1275; VLDQ-NEXT: vcvtuqq2ps %zmm1, %ymm1 1276; VLDQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1277; VLDQ-NEXT: retq 1278; 1279; DQNOVL-LABEL: ulto16f32: 1280; DQNOVL: # %bb.0: 1281; DQNOVL-NEXT: vcvtuqq2ps %zmm0, %ymm0 1282; DQNOVL-NEXT: vcvtuqq2ps %zmm1, %ymm1 1283; DQNOVL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 1284; DQNOVL-NEXT: retq 1285 %b = uitofp <16 x i64> %a to <16 x float> 1286 ret <16 x float> %b 1287} 1288 1289define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { 1290; KNL-LABEL: uito8f64_mask: 1291; KNL: # %bb.0: 1292; KNL-NEXT: kmovw %edi, %k1 1293; KNL-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1294; KNL-NEXT: retq 1295; 1296; VLBW-LABEL: uito8f64_mask: 1297; VLBW: # %bb.0: 1298; VLBW-NEXT: kmovd %edi, %k1 1299; VLBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1300; VLBW-NEXT: retq 1301; 1302; VLNOBW-LABEL: uito8f64_mask: 1303; VLNOBW: # %bb.0: 1304; VLNOBW-NEXT: kmovw %edi, %k1 1305; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1306; VLNOBW-NEXT: retq 1307; 1308; DQNOVL-LABEL: uito8f64_mask: 1309; DQNOVL: # %bb.0: 1310; DQNOVL-NEXT: kmovw %edi, %k1 1311; DQNOVL-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1312; DQNOVL-NEXT: retq 1313; 1314; AVX512BW-LABEL: uito8f64_mask: 1315; AVX512BW: # %bb.0: 1316; AVX512BW-NEXT: kmovd %edi, %k1 1317; AVX512BW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} 1318; AVX512BW-NEXT: retq 1319 %1 = bitcast i8 %c to <8 x i1> 1320 %2 = uitofp <8 x i32> %b to <8 x double> 1321 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a 1322 ret <8 x double> %3 1323} 1324define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { 1325; KNL-LABEL: uito8f64_maskz: 1326; KNL: # %bb.0: 1327; KNL-NEXT: kmovw %edi, %k1 1328; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1329; KNL-NEXT: retq 1330; 1331; VLBW-LABEL: uito8f64_maskz: 1332; VLBW: # %bb.0: 1333; VLBW-NEXT: kmovd %edi, %k1 1334; VLBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1335; VLBW-NEXT: retq 1336; 1337; VLNOBW-LABEL: uito8f64_maskz: 1338; VLNOBW: # %bb.0: 1339; VLNOBW-NEXT: kmovw %edi, %k1 1340; VLNOBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1341; VLNOBW-NEXT: retq 1342; 1343; DQNOVL-LABEL: uito8f64_maskz: 1344; DQNOVL: # %bb.0: 1345; DQNOVL-NEXT: kmovw %edi, %k1 1346; DQNOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1347; DQNOVL-NEXT: retq 1348; 1349; AVX512BW-LABEL: uito8f64_maskz: 1350; AVX512BW: # %bb.0: 1351; AVX512BW-NEXT: kmovd %edi, %k1 1352; AVX512BW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} 1353; AVX512BW-NEXT: retq 1354 %1 = bitcast i8 %b to <8 x i1> 1355 %2 = uitofp <8 x i32> %a to <8 x double> 1356 %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer 1357 ret <8 x double> %3 1358} 1359 1360define <4 x double> @uito4f64(<4 x i32> %a) nounwind { 1361; NOVL-LABEL: uito4f64: 1362; NOVL: # %bb.0: 1363; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1364; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 1365; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1366; NOVL-NEXT: retq 1367; 1368; VL-LABEL: uito4f64: 1369; VL: # %bb.0: 1370; VL-NEXT: vcvtudq2pd %xmm0, %ymm0 1371; VL-NEXT: retq 1372 %b = uitofp <4 x i32> %a to <4 x double> 1373 ret <4 x double> %b 1374} 1375 1376define <16 x float> @uito16f32(<16 x i32> %a) nounwind { 1377; ALL-LABEL: uito16f32: 1378; ALL: # %bb.0: 1379; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0 1380; ALL-NEXT: retq 1381 %b = uitofp <16 x i32> %a to <16 x float> 1382 ret <16 x float> %b 1383} 1384 1385define <8 x double> @uito8f64(<8 x i32> %a) { 1386; ALL-LABEL: uito8f64: 1387; ALL: # %bb.0: 1388; ALL-NEXT: vcvtudq2pd %ymm0, %zmm0 1389; ALL-NEXT: retq 1390 %b = uitofp <8 x i32> %a to <8 x double> 1391 ret <8 x double> %b 1392} 1393 1394define <8 x float> @uito8f32(<8 x i32> %a) nounwind { 1395; NOVL-LABEL: uito8f32: 1396; NOVL: # %bb.0: 1397; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1398; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0 1399; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1400; NOVL-NEXT: retq 1401; 1402; VL-LABEL: uito8f32: 1403; VL: # %bb.0: 1404; VL-NEXT: vcvtudq2ps %ymm0, %ymm0 1405; VL-NEXT: retq 1406 %b = uitofp <8 x i32> %a to <8 x float> 1407 ret <8 x float> %b 1408} 1409 1410define <4 x float> @uito4f32(<4 x i32> %a) nounwind { 1411; NOVL-LABEL: uito4f32: 1412; NOVL: # %bb.0: 1413; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1414; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0 1415; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1416; NOVL-NEXT: vzeroupper 1417; NOVL-NEXT: retq 1418; 1419; VL-LABEL: uito4f32: 1420; VL: # %bb.0: 1421; VL-NEXT: vcvtudq2ps %xmm0, %xmm0 1422; VL-NEXT: retq 1423 %b = uitofp <4 x i32> %a to <4 x float> 1424 ret <4 x float> %b 1425} 1426 1427define i32 @fptosi(float %a) nounwind { 1428; ALL-LABEL: fptosi: 1429; ALL: # %bb.0: 1430; ALL-NEXT: vcvttss2si %xmm0, %eax 1431; ALL-NEXT: retq 1432 %b = fptosi float %a to i32 1433 ret i32 %b 1434} 1435 1436define i32 @fptoui(float %a) nounwind { 1437; ALL-LABEL: fptoui: 1438; ALL: # %bb.0: 1439; ALL-NEXT: vcvttss2usi %xmm0, %eax 1440; ALL-NEXT: retq 1441 %b = fptoui float %a to i32 1442 ret i32 %b 1443} 1444 1445define float @uitof32(i32 %a) nounwind { 1446; ALL-LABEL: uitof32: 1447; ALL: # %bb.0: 1448; ALL-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 1449; ALL-NEXT: retq 1450 %b = uitofp i32 %a to float 1451 ret float %b 1452} 1453 1454define double @uitof64(i32 %a) nounwind { 1455; ALL-LABEL: uitof64: 1456; ALL: # %bb.0: 1457; ALL-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 1458; ALL-NEXT: retq 1459 %b = uitofp i32 %a to double 1460 ret double %b 1461} 1462 1463define <16 x float> @sbto16f32(<16 x i32> %a) { 1464; NODQ-LABEL: sbto16f32: 1465; NODQ: # %bb.0: 1466; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1467; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1468; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1469; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0 1470; NODQ-NEXT: retq 1471; 1472; VLDQ-LABEL: sbto16f32: 1473; VLDQ: # %bb.0: 1474; VLDQ-NEXT: vpmovd2m %zmm0, %k0 1475; VLDQ-NEXT: vpmovm2d %k0, %zmm0 1476; VLDQ-NEXT: vcvtdq2ps %zmm0, %zmm0 1477; VLDQ-NEXT: retq 1478; 1479; DQNOVL-LABEL: sbto16f32: 1480; DQNOVL: # %bb.0: 1481; DQNOVL-NEXT: vpmovd2m %zmm0, %k0 1482; DQNOVL-NEXT: vpmovm2d %k0, %zmm0 1483; DQNOVL-NEXT: vcvtdq2ps %zmm0, %zmm0 1484; DQNOVL-NEXT: retq 1485 %mask = icmp slt <16 x i32> %a, zeroinitializer 1486 %1 = sitofp <16 x i1> %mask to <16 x float> 1487 ret <16 x float> %1 1488} 1489 1490define <16 x float> @scto16f32(<16 x i8> %a) { 1491; ALL-LABEL: scto16f32: 1492; ALL: # %bb.0: 1493; ALL-NEXT: vpmovsxbd %xmm0, %zmm0 1494; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1495; ALL-NEXT: retq 1496 %1 = sitofp <16 x i8> %a to <16 x float> 1497 ret <16 x float> %1 1498} 1499 1500define <16 x float> @ssto16f32(<16 x i16> %a) { 1501; ALL-LABEL: ssto16f32: 1502; ALL: # %bb.0: 1503; ALL-NEXT: vpmovsxwd %ymm0, %zmm0 1504; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1505; ALL-NEXT: retq 1506 %1 = sitofp <16 x i16> %a to <16 x float> 1507 ret <16 x float> %1 1508} 1509 1510define <8 x double> @ssto16f64(<8 x i16> %a) { 1511; ALL-LABEL: ssto16f64: 1512; ALL: # %bb.0: 1513; ALL-NEXT: vpmovsxwd %xmm0, %ymm0 1514; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1515; ALL-NEXT: retq 1516 %1 = sitofp <8 x i16> %a to <8 x double> 1517 ret <8 x double> %1 1518} 1519 1520define <8 x double> @scto8f64(<8 x i8> %a) { 1521; ALL-LABEL: scto8f64: 1522; ALL: # %bb.0: 1523; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1524; ALL-NEXT: vpslld $24, %ymm0, %ymm0 1525; ALL-NEXT: vpsrad $24, %ymm0, %ymm0 1526; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1527; ALL-NEXT: retq 1528 %1 = sitofp <8 x i8> %a to <8 x double> 1529 ret <8 x double> %1 1530} 1531 1532define <16 x double> @scto16f64(<16 x i8> %a) { 1533; ALL-LABEL: scto16f64: 1534; ALL: # %bb.0: 1535; ALL-NEXT: vpmovsxbd %xmm0, %zmm1 1536; ALL-NEXT: vcvtdq2pd %ymm1, %zmm0 1537; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1538; ALL-NEXT: vcvtdq2pd %ymm1, %zmm1 1539; ALL-NEXT: retq 1540 %b = sitofp <16 x i8> %a to <16 x double> 1541 ret <16 x double> %b 1542} 1543 1544define <16 x double> @sbto16f64(<16 x double> %a) { 1545; NODQ-LABEL: sbto16f64: 1546; NODQ: # %bb.0: 1547; NODQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1548; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0 1549; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1 1550; NODQ-NEXT: kunpckbw %k0, %k1, %k1 1551; NODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} 1552; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 1553; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1554; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1 1555; NODQ-NEXT: retq 1556; 1557; VLDQ-LABEL: sbto16f64: 1558; VLDQ: # %bb.0: 1559; VLDQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1560; VLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0 1561; VLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1 1562; VLDQ-NEXT: kunpckbw %k0, %k1, %k0 1563; VLDQ-NEXT: vpmovm2d %k0, %zmm1 1564; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0 1565; VLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1566; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1 1567; VLDQ-NEXT: retq 1568; 1569; DQNOVL-LABEL: sbto16f64: 1570; DQNOVL: # %bb.0: 1571; DQNOVL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1572; DQNOVL-NEXT: vcmpltpd %zmm0, %zmm2, %k0 1573; DQNOVL-NEXT: vcmpltpd %zmm1, %zmm2, %k1 1574; DQNOVL-NEXT: kunpckbw %k0, %k1, %k0 1575; DQNOVL-NEXT: vpmovm2d %k0, %zmm1 1576; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm0 1577; DQNOVL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1578; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm1 1579; DQNOVL-NEXT: retq 1580 %cmpres = fcmp ogt <16 x double> %a, zeroinitializer 1581 %1 = sitofp <16 x i1> %cmpres to <16 x double> 1582 ret <16 x double> %1 1583} 1584 1585define <8 x double> @sbto8f64(<8 x double> %a) { 1586; NOVLDQ-LABEL: sbto8f64: 1587; NOVLDQ: # %bb.0: 1588; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1589; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 1590; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1591; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0 1592; NOVLDQ-NEXT: retq 1593; 1594; VLDQ-LABEL: sbto8f64: 1595; VLDQ: # %bb.0: 1596; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1597; VLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 1598; VLDQ-NEXT: vpmovm2d %k0, %ymm0 1599; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0 1600; VLDQ-NEXT: retq 1601; 1602; VLNODQ-LABEL: sbto8f64: 1603; VLNODQ: # %bb.0: 1604; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1605; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 1606; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 1607; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 1608; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0 1609; VLNODQ-NEXT: retq 1610; 1611; DQNOVL-LABEL: sbto8f64: 1612; DQNOVL: # %bb.0: 1613; DQNOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1614; DQNOVL-NEXT: vcmpltpd %zmm0, %zmm1, %k0 1615; DQNOVL-NEXT: vpmovm2d %k0, %zmm0 1616; DQNOVL-NEXT: vcvtdq2pd %ymm0, %zmm0 1617; DQNOVL-NEXT: retq 1618 %cmpres = fcmp ogt <8 x double> %a, zeroinitializer 1619 %1 = sitofp <8 x i1> %cmpres to <8 x double> 1620 ret <8 x double> %1 1621} 1622 1623define <8 x float> @sbto8f32(<8 x float> %a) { 1624; ALL-LABEL: sbto8f32: 1625; ALL: # %bb.0: 1626; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1627; ALL-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 1628; ALL-NEXT: vcvtdq2ps %ymm0, %ymm0 1629; ALL-NEXT: retq 1630 %cmpres = fcmp ogt <8 x float> %a, zeroinitializer 1631 %1 = sitofp <8 x i1> %cmpres to <8 x float> 1632 ret <8 x float> %1 1633} 1634 1635define <4 x float> @sbto4f32(<4 x float> %a) { 1636; ALL-LABEL: sbto4f32: 1637; ALL: # %bb.0: 1638; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1639; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 1640; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0 1641; ALL-NEXT: retq 1642 %cmpres = fcmp ogt <4 x float> %a, zeroinitializer 1643 %1 = sitofp <4 x i1> %cmpres to <4 x float> 1644 ret <4 x float> %1 1645} 1646 1647define <4 x double> @sbto4f64(<4 x double> %a) { 1648; NOVL-LABEL: sbto4f64: 1649; NOVL: # %bb.0: 1650; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1651; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 1652; NOVL-NEXT: vpmovqd %zmm0, %ymm0 1653; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0 1654; NOVL-NEXT: retq 1655; 1656; VLDQ-LABEL: sbto4f64: 1657; VLDQ: # %bb.0: 1658; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1659; VLDQ-NEXT: vcmpltpd %ymm0, %ymm1, %k0 1660; VLDQ-NEXT: vpmovm2d %k0, %xmm0 1661; VLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0 1662; VLDQ-NEXT: retq 1663; 1664; VLNODQ-LABEL: sbto4f64: 1665; VLNODQ: # %bb.0: 1666; VLNODQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1667; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1 1668; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 1669; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 1670; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0 1671; VLNODQ-NEXT: retq 1672 %cmpres = fcmp ogt <4 x double> %a, zeroinitializer 1673 %1 = sitofp <4 x i1> %cmpres to <4 x double> 1674 ret <4 x double> %1 1675} 1676 1677define <2 x float> @sbto2f32(<2 x float> %a) { 1678; ALL-LABEL: sbto2f32: 1679; ALL: # %bb.0: 1680; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 1681; ALL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 1682; ALL-NEXT: vcvtdq2ps %xmm0, %xmm0 1683; ALL-NEXT: retq 1684 %cmpres = fcmp ogt <2 x float> %a, zeroinitializer 1685 %1 = sitofp <2 x i1> %cmpres to <2 x float> 1686 ret <2 x float> %1 1687} 1688 1689define <2 x double> @sbto2f64(<2 x double> %a) { 1690; ALL-LABEL: sbto2f64: 1691; ALL: # %bb.0: 1692; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1693; ALL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 1694; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] 1695; ALL-NEXT: vcvtdq2pd %xmm0, %xmm0 1696; ALL-NEXT: retq 1697 %cmpres = fcmp ogt <2 x double> %a, zeroinitializer 1698 %1 = sitofp <2 x i1> %cmpres to <2 x double> 1699 ret <2 x double> %1 1700} 1701 1702define <16 x float> @ucto16f32(<16 x i8> %a) { 1703; ALL-LABEL: ucto16f32: 1704; ALL: # %bb.0: 1705; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1706; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1707; ALL-NEXT: retq 1708 %b = uitofp <16 x i8> %a to <16 x float> 1709 ret <16 x float>%b 1710} 1711 1712define <8 x double> @ucto8f64(<8 x i8> %a) { 1713; ALL-LABEL: ucto8f64: 1714; ALL: # %bb.0: 1715; ALL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1716; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1717; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1718; ALL-NEXT: retq 1719 %b = uitofp <8 x i8> %a to <8 x double> 1720 ret <8 x double> %b 1721} 1722 1723define <16 x float> @swto16f32(<16 x i16> %a) { 1724; ALL-LABEL: swto16f32: 1725; ALL: # %bb.0: 1726; ALL-NEXT: vpmovsxwd %ymm0, %zmm0 1727; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1728; ALL-NEXT: retq 1729 %b = sitofp <16 x i16> %a to <16 x float> 1730 ret <16 x float> %b 1731} 1732 1733define <8 x double> @swto8f64(<8 x i16> %a) { 1734; ALL-LABEL: swto8f64: 1735; ALL: # %bb.0: 1736; ALL-NEXT: vpmovsxwd %xmm0, %ymm0 1737; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1738; ALL-NEXT: retq 1739 %b = sitofp <8 x i16> %a to <8 x double> 1740 ret <8 x double> %b 1741} 1742 1743define <16 x double> @swto16f64(<16 x i16> %a) { 1744; ALL-LABEL: swto16f64: 1745; ALL: # %bb.0: 1746; ALL-NEXT: vpmovsxwd %ymm0, %zmm1 1747; ALL-NEXT: vcvtdq2pd %ymm1, %zmm0 1748; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1749; ALL-NEXT: vcvtdq2pd %ymm1, %zmm1 1750; ALL-NEXT: retq 1751 %b = sitofp <16 x i16> %a to <16 x double> 1752 ret <16 x double> %b 1753} 1754 1755define <16 x double> @ucto16f64(<16 x i8> %a) { 1756; ALL-LABEL: ucto16f64: 1757; ALL: # %bb.0: 1758; ALL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1759; ALL-NEXT: vcvtdq2pd %ymm1, %zmm0 1760; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1761; ALL-NEXT: vcvtdq2pd %ymm1, %zmm1 1762; ALL-NEXT: retq 1763 %b = uitofp <16 x i8> %a to <16 x double> 1764 ret <16 x double> %b 1765} 1766 1767define <16 x float> @uwto16f32(<16 x i16> %a) { 1768; ALL-LABEL: uwto16f32: 1769; ALL: # %bb.0: 1770; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1771; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1772; ALL-NEXT: retq 1773 %b = uitofp <16 x i16> %a to <16 x float> 1774 ret <16 x float> %b 1775} 1776 1777define <8 x double> @uwto8f64(<8 x i16> %a) { 1778; ALL-LABEL: uwto8f64: 1779; ALL: # %bb.0: 1780; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1781; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1782; ALL-NEXT: retq 1783 %b = uitofp <8 x i16> %a to <8 x double> 1784 ret <8 x double> %b 1785} 1786 1787define <16 x double> @uwto16f64(<16 x i16> %a) { 1788; ALL-LABEL: uwto16f64: 1789; ALL: # %bb.0: 1790; ALL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1791; ALL-NEXT: vcvtdq2pd %ymm1, %zmm0 1792; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1793; ALL-NEXT: vcvtdq2pd %ymm1, %zmm1 1794; ALL-NEXT: retq 1795 %b = uitofp <16 x i16> %a to <16 x double> 1796 ret <16 x double> %b 1797} 1798 1799define <16 x float> @sito16f32(<16 x i32> %a) { 1800; ALL-LABEL: sito16f32: 1801; ALL: # %bb.0: 1802; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1803; ALL-NEXT: retq 1804 %b = sitofp <16 x i32> %a to <16 x float> 1805 ret <16 x float> %b 1806} 1807 1808define <16 x double> @sito16f64(<16 x i32> %a) { 1809; ALL-LABEL: sito16f64: 1810; ALL: # %bb.0: 1811; ALL-NEXT: vcvtdq2pd %ymm0, %zmm2 1812; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 1813; ALL-NEXT: vcvtdq2pd %ymm0, %zmm1 1814; ALL-NEXT: vmovaps %zmm2, %zmm0 1815; ALL-NEXT: retq 1816 %b = sitofp <16 x i32> %a to <16 x double> 1817 ret <16 x double> %b 1818} 1819 1820define <16 x float> @usto16f32(<16 x i16> %a) { 1821; ALL-LABEL: usto16f32: 1822; ALL: # %bb.0: 1823; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 1824; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 1825; ALL-NEXT: retq 1826 %b = uitofp <16 x i16> %a to <16 x float> 1827 ret <16 x float> %b 1828} 1829 1830define <16 x float> @ubto16f32(<16 x i32> %a) { 1831; NODQ-LABEL: ubto16f32: 1832; NODQ: # %bb.0: 1833; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1834; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1835; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1836; NODQ-NEXT: vpsrld $31, %zmm0, %zmm0 1837; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0 1838; NODQ-NEXT: retq 1839; 1840; VLDQ-LABEL: ubto16f32: 1841; VLDQ: # %bb.0: 1842; VLDQ-NEXT: vpmovd2m %zmm0, %k0 1843; VLDQ-NEXT: vpmovm2d %k0, %zmm0 1844; VLDQ-NEXT: vpsrld $31, %zmm0, %zmm0 1845; VLDQ-NEXT: vcvtdq2ps %zmm0, %zmm0 1846; VLDQ-NEXT: retq 1847; 1848; DQNOVL-LABEL: ubto16f32: 1849; DQNOVL: # %bb.0: 1850; DQNOVL-NEXT: vpmovd2m %zmm0, %k0 1851; DQNOVL-NEXT: vpmovm2d %k0, %zmm0 1852; DQNOVL-NEXT: vpsrld $31, %zmm0, %zmm0 1853; DQNOVL-NEXT: vcvtdq2ps %zmm0, %zmm0 1854; DQNOVL-NEXT: retq 1855 %mask = icmp slt <16 x i32> %a, zeroinitializer 1856 %1 = uitofp <16 x i1> %mask to <16 x float> 1857 ret <16 x float> %1 1858} 1859 1860define <16 x double> @ubto16f64(<16 x i32> %a) { 1861; NODQ-LABEL: ubto16f64: 1862; NODQ: # %bb.0: 1863; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 1864; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 1865; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1866; NODQ-NEXT: vpsrld $31, %zmm0, %zmm1 1867; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0 1868; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1869; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1 1870; NODQ-NEXT: retq 1871; 1872; VLDQ-LABEL: ubto16f64: 1873; VLDQ: # %bb.0: 1874; VLDQ-NEXT: vpmovd2m %zmm0, %k0 1875; VLDQ-NEXT: vpmovm2d %k0, %zmm0 1876; VLDQ-NEXT: vpsrld $31, %zmm0, %zmm1 1877; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0 1878; VLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1879; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1 1880; VLDQ-NEXT: retq 1881; 1882; DQNOVL-LABEL: ubto16f64: 1883; DQNOVL: # %bb.0: 1884; DQNOVL-NEXT: vpmovd2m %zmm0, %k0 1885; DQNOVL-NEXT: vpmovm2d %k0, %zmm0 1886; DQNOVL-NEXT: vpsrld $31, %zmm0, %zmm1 1887; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm0 1888; DQNOVL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1889; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm1 1890; DQNOVL-NEXT: retq 1891 %mask = icmp slt <16 x i32> %a, zeroinitializer 1892 %1 = uitofp <16 x i1> %mask to <16 x double> 1893 ret <16 x double> %1 1894} 1895 1896define <8 x float> @ubto8f32(<8 x i32> %a) { 1897; NOVL-LABEL: ubto8f32: 1898; NOVL: # %bb.0: 1899; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1900; NOVL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1901; NOVL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216] 1902; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 1903; NOVL-NEXT: retq 1904; 1905; VL-LABEL: ubto8f32: 1906; VL: # %bb.0: 1907; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1908; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1909; VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 1910; VL-NEXT: retq 1911 %mask = icmp slt <8 x i32> %a, zeroinitializer 1912 %1 = uitofp <8 x i1> %mask to <8 x float> 1913 ret <8 x float> %1 1914} 1915 1916define <8 x double> @ubto8f64(<8 x i32> %a) { 1917; ALL-LABEL: ubto8f64: 1918; ALL: # %bb.0: 1919; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1920; ALL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 1921; ALL-NEXT: vpsrld $31, %ymm0, %ymm0 1922; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 1923; ALL-NEXT: retq 1924 %mask = icmp slt <8 x i32> %a, zeroinitializer 1925 %1 = uitofp <8 x i1> %mask to <8 x double> 1926 ret <8 x double> %1 1927} 1928 1929define <4 x float> @ubto4f32(<4 x i32> %a) { 1930; NOVL-LABEL: ubto4f32: 1931; NOVL: # %bb.0: 1932; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1933; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 1934; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1065353216,1065353216,1065353216,1065353216] 1935; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0 1936; NOVL-NEXT: retq 1937; 1938; VL-LABEL: ubto4f32: 1939; VL: # %bb.0: 1940; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1941; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 1942; VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 1943; VL-NEXT: retq 1944 %mask = icmp slt <4 x i32> %a, zeroinitializer 1945 %1 = uitofp <4 x i1> %mask to <4 x float> 1946 ret <4 x float> %1 1947} 1948 1949define <4 x double> @ubto4f64(<4 x i32> %a) { 1950; ALL-LABEL: ubto4f64: 1951; ALL: # %bb.0: 1952; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1953; ALL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 1954; ALL-NEXT: vpsrld $31, %xmm0, %xmm0 1955; ALL-NEXT: vcvtdq2pd %xmm0, %ymm0 1956; ALL-NEXT: retq 1957 %mask = icmp slt <4 x i32> %a, zeroinitializer 1958 %1 = uitofp <4 x i1> %mask to <4 x double> 1959 ret <4 x double> %1 1960} 1961 1962define <2 x float> @ubto2f32(<2 x i32> %a) { 1963; ALL-LABEL: ubto2f32: 1964; ALL: # %bb.0: 1965; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1966; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1967; ALL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 1968; ALL-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 1969; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1970; ALL-NEXT: retq 1971 %mask = icmp ne <2 x i32> %a, zeroinitializer 1972 %1 = uitofp <2 x i1> %mask to <2 x float> 1973 ret <2 x float> %1 1974} 1975 1976define <2 x double> @ubto2f64(<2 x i32> %a) { 1977; ALL-LABEL: ubto2f64: 1978; ALL: # %bb.0: 1979; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1980; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1981; ALL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 1982; ALL-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 1983; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1984; ALL-NEXT: vcvtdq2pd %xmm0, %xmm0 1985; ALL-NEXT: retq 1986 %mask = icmp ne <2 x i32> %a, zeroinitializer 1987 %1 = uitofp <2 x i1> %mask to <2 x double> 1988 ret <2 x double> %1 1989} 1990 1991define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) { 1992; NOVLDQ-LABEL: test_2f64toub: 1993; NOVLDQ: # %bb.0: 1994; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1995; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1996; NOVLDQ-NEXT: vcvttpd2udq %zmm0, %ymm0 1997; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 1998; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 1999; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2000; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2001; NOVLDQ-NEXT: vzeroupper 2002; NOVLDQ-NEXT: retq 2003; 2004; VLDQ-LABEL: test_2f64toub: 2005; VLDQ: # %bb.0: 2006; VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 2007; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2008; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2009; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2010; VLDQ-NEXT: retq 2011; 2012; VLNODQ-LABEL: test_2f64toub: 2013; VLNODQ: # %bb.0: 2014; VLNODQ-NEXT: vcvttpd2udq %xmm0, %xmm0 2015; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2016; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2017; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2018; VLNODQ-NEXT: retq 2019; 2020; DQNOVL-LABEL: test_2f64toub: 2021; DQNOVL: # %bb.0: 2022; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2023; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2024; DQNOVL-NEXT: vcvttpd2udq %zmm0, %ymm0 2025; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 2026; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2027; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2028; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2029; DQNOVL-NEXT: vzeroupper 2030; DQNOVL-NEXT: retq 2031 %mask = fptoui <2 x double> %a to <2 x i1> 2032 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer 2033 ret <2 x i64> %select 2034} 2035 2036define <4 x i64> @test_4f64toub(<4 x double> %a, <4 x i64> %passthru) { 2037; NOVLDQ-LABEL: test_4f64toub: 2038; NOVLDQ: # %bb.0: 2039; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2040; NOVLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2041; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2042; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2043; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2044; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2045; NOVLDQ-NEXT: retq 2046; 2047; VLDQ-LABEL: test_4f64toub: 2048; VLDQ: # %bb.0: 2049; VLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2050; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2051; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2052; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2053; VLDQ-NEXT: retq 2054; 2055; VLNODQ-LABEL: test_4f64toub: 2056; VLNODQ: # %bb.0: 2057; VLNODQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2058; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2059; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2060; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2061; VLNODQ-NEXT: retq 2062; 2063; DQNOVL-LABEL: test_4f64toub: 2064; DQNOVL: # %bb.0: 2065; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2066; DQNOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 2067; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 2068; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2069; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2070; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2071; DQNOVL-NEXT: retq 2072 %mask = fptoui <4 x double> %a to <4 x i1> 2073 %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer 2074 ret <4 x i64> %select 2075} 2076 2077define <8 x i64> @test_8f64toub(<8 x double> %a, <8 x i64> %passthru) { 2078; NOVLDQ-LABEL: test_8f64toub: 2079; NOVLDQ: # %bb.0: 2080; NOVLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2081; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 2082; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2083; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2084; NOVLDQ-NEXT: retq 2085; 2086; VLDQ-LABEL: test_8f64toub: 2087; VLDQ: # %bb.0: 2088; VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2089; VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 2090; VLDQ-NEXT: vpmovd2m %ymm0, %k1 2091; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2092; VLDQ-NEXT: retq 2093; 2094; VLNODQ-LABEL: test_8f64toub: 2095; VLNODQ: # %bb.0: 2096; VLNODQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2097; VLNODQ-NEXT: vpslld $31, %ymm0, %ymm0 2098; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 2099; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2100; VLNODQ-NEXT: retq 2101; 2102; DQNOVL-LABEL: test_8f64toub: 2103; DQNOVL: # %bb.0: 2104; DQNOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 2105; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 2106; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2107; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2108; DQNOVL-NEXT: retq 2109 %mask = fptoui <8 x double> %a to <8 x i1> 2110 %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer 2111 ret <8 x i64> %select 2112} 2113 2114define <2 x i64> @test_2f32toub(<2 x float> %a, <2 x i64> %passthru) { 2115; NOVLDQ-LABEL: test_2f32toub: 2116; NOVLDQ: # %bb.0: 2117; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2118; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2119; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2120; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2121; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2122; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2123; NOVLDQ-NEXT: vzeroupper 2124; NOVLDQ-NEXT: retq 2125; 2126; VLDQ-LABEL: test_2f32toub: 2127; VLDQ: # %bb.0: 2128; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2129; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2130; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2131; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2132; VLDQ-NEXT: retq 2133; 2134; VLNODQ-LABEL: test_2f32toub: 2135; VLNODQ: # %bb.0: 2136; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 2137; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2138; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2139; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2140; VLNODQ-NEXT: retq 2141; 2142; DQNOVL-LABEL: test_2f32toub: 2143; DQNOVL: # %bb.0: 2144; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2145; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 2146; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 2147; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2148; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2149; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2150; DQNOVL-NEXT: vzeroupper 2151; DQNOVL-NEXT: retq 2152 %mask = fptoui <2 x float> %a to <2 x i1> 2153 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer 2154 ret <2 x i64> %select 2155} 2156 2157define <4 x i64> @test_4f32toub(<4 x float> %a, <4 x i64> %passthru) { 2158; NOVLDQ-LABEL: test_4f32toub: 2159; NOVLDQ: # %bb.0: 2160; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2161; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2162; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2163; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2164; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2165; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2166; NOVLDQ-NEXT: retq 2167; 2168; VLDQ-LABEL: test_4f32toub: 2169; VLDQ: # %bb.0: 2170; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2171; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2172; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2173; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2174; VLDQ-NEXT: retq 2175; 2176; VLNODQ-LABEL: test_4f32toub: 2177; VLNODQ: # %bb.0: 2178; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 2179; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2180; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2181; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2182; VLNODQ-NEXT: retq 2183; 2184; DQNOVL-LABEL: test_4f32toub: 2185; DQNOVL: # %bb.0: 2186; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2187; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 2188; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 2189; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2190; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2191; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2192; DQNOVL-NEXT: retq 2193 %mask = fptoui <4 x float> %a to <4 x i1> 2194 %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer 2195 ret <4 x i64> %select 2196} 2197 2198define <8 x i64> @test_8f32toub(<8 x float> %a, <8 x i64> %passthru) { 2199; NOVLDQ-LABEL: test_8f32toub: 2200; NOVLDQ: # %bb.0: 2201; NOVLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 2202; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 2203; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2204; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2205; NOVLDQ-NEXT: retq 2206; 2207; VLDQ-LABEL: test_8f32toub: 2208; VLDQ: # %bb.0: 2209; VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 2210; VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 2211; VLDQ-NEXT: vpmovd2m %ymm0, %k1 2212; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2213; VLDQ-NEXT: retq 2214; 2215; VLNODQ-LABEL: test_8f32toub: 2216; VLNODQ: # %bb.0: 2217; VLNODQ-NEXT: vcvttps2dq %ymm0, %ymm0 2218; VLNODQ-NEXT: vpslld $31, %ymm0, %ymm0 2219; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 2220; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2221; VLNODQ-NEXT: retq 2222; 2223; DQNOVL-LABEL: test_8f32toub: 2224; DQNOVL: # %bb.0: 2225; DQNOVL-NEXT: vcvttps2dq %ymm0, %ymm0 2226; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 2227; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2228; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2229; DQNOVL-NEXT: retq 2230 %mask = fptoui <8 x float> %a to <8 x i1> 2231 %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer 2232 ret <8 x i64> %select 2233} 2234 2235define <16 x i32> @test_16f32toub(<16 x float> %a, <16 x i32> %passthru) { 2236; NODQ-LABEL: test_16f32toub: 2237; NODQ: # %bb.0: 2238; NODQ-NEXT: vcvttps2dq %zmm0, %zmm0 2239; NODQ-NEXT: vpslld $31, %zmm0, %zmm0 2240; NODQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2241; NODQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2242; NODQ-NEXT: retq 2243; 2244; VLDQ-LABEL: test_16f32toub: 2245; VLDQ: # %bb.0: 2246; VLDQ-NEXT: vcvttps2dq %zmm0, %zmm0 2247; VLDQ-NEXT: vpslld $31, %zmm0, %zmm0 2248; VLDQ-NEXT: vpmovd2m %zmm0, %k1 2249; VLDQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2250; VLDQ-NEXT: retq 2251; 2252; DQNOVL-LABEL: test_16f32toub: 2253; DQNOVL: # %bb.0: 2254; DQNOVL-NEXT: vcvttps2dq %zmm0, %zmm0 2255; DQNOVL-NEXT: vpslld $31, %zmm0, %zmm0 2256; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2257; DQNOVL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2258; DQNOVL-NEXT: retq 2259 %mask = fptoui <16 x float> %a to <16 x i1> 2260 %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer 2261 ret <16 x i32> %select 2262} 2263 2264define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) { 2265; NOVLDQ-LABEL: test_2f64tosb: 2266; NOVLDQ: # %bb.0: 2267; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2268; NOVLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2269; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2270; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2271; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2272; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2273; NOVLDQ-NEXT: vzeroupper 2274; NOVLDQ-NEXT: retq 2275; 2276; VLDQ-LABEL: test_2f64tosb: 2277; VLDQ: # %bb.0: 2278; VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2279; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2280; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2281; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2282; VLDQ-NEXT: retq 2283; 2284; VLNODQ-LABEL: test_2f64tosb: 2285; VLNODQ: # %bb.0: 2286; VLNODQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2287; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 2288; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2289; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2290; VLNODQ-NEXT: retq 2291; 2292; DQNOVL-LABEL: test_2f64tosb: 2293; DQNOVL: # %bb.0: 2294; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2295; DQNOVL-NEXT: vcvttpd2dq %xmm0, %xmm0 2296; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 2297; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2298; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2299; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2300; DQNOVL-NEXT: vzeroupper 2301; DQNOVL-NEXT: retq 2302 %mask = fptosi <2 x double> %a to <2 x i1> 2303 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer 2304 ret <2 x i64> %select 2305} 2306 2307define <4 x i64> @test_4f64tosb(<4 x double> %a, <4 x i64> %passthru) { 2308; NOVLDQ-LABEL: test_4f64tosb: 2309; NOVLDQ: # %bb.0: 2310; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2311; NOVLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2312; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2313; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2314; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2315; NOVLDQ-NEXT: retq 2316; 2317; VLDQ-LABEL: test_4f64tosb: 2318; VLDQ: # %bb.0: 2319; VLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2320; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2321; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2322; VLDQ-NEXT: retq 2323; 2324; VLNODQ-LABEL: test_4f64tosb: 2325; VLNODQ: # %bb.0: 2326; VLNODQ-NEXT: vcvttpd2dq %ymm0, %xmm0 2327; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2328; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2329; VLNODQ-NEXT: retq 2330; 2331; DQNOVL-LABEL: test_4f64tosb: 2332; DQNOVL: # %bb.0: 2333; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2334; DQNOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 2335; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2336; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2337; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2338; DQNOVL-NEXT: retq 2339 %mask = fptosi <4 x double> %a to <4 x i1> 2340 %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer 2341 ret <4 x i64> %select 2342} 2343 2344define <8 x i64> @test_8f64tosb(<8 x double> %a, <8 x i64> %passthru) { 2345; NOVLDQ-LABEL: test_8f64tosb: 2346; NOVLDQ: # %bb.0: 2347; NOVLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2348; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2349; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2350; NOVLDQ-NEXT: retq 2351; 2352; VLDQ-LABEL: test_8f64tosb: 2353; VLDQ: # %bb.0: 2354; VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2355; VLDQ-NEXT: vpmovd2m %ymm0, %k1 2356; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2357; VLDQ-NEXT: retq 2358; 2359; VLNODQ-LABEL: test_8f64tosb: 2360; VLNODQ: # %bb.0: 2361; VLNODQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2362; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 2363; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2364; VLNODQ-NEXT: retq 2365; 2366; DQNOVL-LABEL: test_8f64tosb: 2367; DQNOVL: # %bb.0: 2368; DQNOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 2369; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2370; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2371; DQNOVL-NEXT: retq 2372 %mask = fptosi <8 x double> %a to <8 x i1> 2373 %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer 2374 ret <8 x i64> %select 2375} 2376 2377define <2 x i64> @test_2f32tosb(<2 x float> %a, <2 x i64> %passthru) { 2378; NOVLDQ-LABEL: test_2f32tosb: 2379; NOVLDQ: # %bb.0: 2380; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2381; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2382; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2383; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2384; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2385; NOVLDQ-NEXT: vzeroupper 2386; NOVLDQ-NEXT: retq 2387; 2388; VLDQ-LABEL: test_2f32tosb: 2389; VLDQ: # %bb.0: 2390; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2391; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2392; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2393; VLDQ-NEXT: retq 2394; 2395; VLNODQ-LABEL: test_2f32tosb: 2396; VLNODQ: # %bb.0: 2397; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 2398; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2399; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} 2400; VLNODQ-NEXT: retq 2401; 2402; DQNOVL-LABEL: test_2f32tosb: 2403; DQNOVL: # %bb.0: 2404; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2405; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 2406; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2407; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2408; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2409; DQNOVL-NEXT: vzeroupper 2410; DQNOVL-NEXT: retq 2411 %mask = fptosi <2 x float> %a to <2 x i1> 2412 %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer 2413 ret <2 x i64> %select 2414} 2415 2416define <4 x i64> @test_4f32tosb(<4 x float> %a, <4 x i64> %passthru) { 2417; NOVLDQ-LABEL: test_4f32tosb: 2418; NOVLDQ: # %bb.0: 2419; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2420; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2421; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2422; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2423; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2424; NOVLDQ-NEXT: retq 2425; 2426; VLDQ-LABEL: test_4f32tosb: 2427; VLDQ: # %bb.0: 2428; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2429; VLDQ-NEXT: vpmovd2m %xmm0, %k1 2430; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2431; VLDQ-NEXT: retq 2432; 2433; VLNODQ-LABEL: test_4f32tosb: 2434; VLNODQ: # %bb.0: 2435; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 2436; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 2437; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} 2438; VLNODQ-NEXT: retq 2439; 2440; DQNOVL-LABEL: test_4f32tosb: 2441; DQNOVL: # %bb.0: 2442; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2443; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 2444; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2445; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2446; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2447; DQNOVL-NEXT: retq 2448 %mask = fptosi <4 x float> %a to <4 x i1> 2449 %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer 2450 ret <4 x i64> %select 2451} 2452 2453define <8 x i64> @test_8f32tosb(<8 x float> %a, <8 x i64> %passthru) { 2454; NOVLDQ-LABEL: test_8f32tosb: 2455; NOVLDQ: # %bb.0: 2456; NOVLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 2457; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2458; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2459; NOVLDQ-NEXT: retq 2460; 2461; VLDQ-LABEL: test_8f32tosb: 2462; VLDQ: # %bb.0: 2463; VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 2464; VLDQ-NEXT: vpmovd2m %ymm0, %k1 2465; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2466; VLDQ-NEXT: retq 2467; 2468; VLNODQ-LABEL: test_8f32tosb: 2469; VLNODQ: # %bb.0: 2470; VLNODQ-NEXT: vcvttps2dq %ymm0, %ymm0 2471; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 2472; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2473; VLNODQ-NEXT: retq 2474; 2475; DQNOVL-LABEL: test_8f32tosb: 2476; DQNOVL: # %bb.0: 2477; DQNOVL-NEXT: vcvttps2dq %ymm0, %ymm0 2478; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2479; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} 2480; DQNOVL-NEXT: retq 2481 %mask = fptosi <8 x float> %a to <8 x i1> 2482 %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer 2483 ret <8 x i64> %select 2484} 2485 2486define <16 x i32> @test_16f32tosb(<16 x float> %a, <16 x i32> %passthru) { 2487; NODQ-LABEL: test_16f32tosb: 2488; NODQ: # %bb.0: 2489; NODQ-NEXT: vcvttps2dq %zmm0, %zmm0 2490; NODQ-NEXT: vptestmd %zmm0, %zmm0, %k1 2491; NODQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2492; NODQ-NEXT: retq 2493; 2494; VLDQ-LABEL: test_16f32tosb: 2495; VLDQ: # %bb.0: 2496; VLDQ-NEXT: vcvttps2dq %zmm0, %zmm0 2497; VLDQ-NEXT: vpmovd2m %zmm0, %k1 2498; VLDQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2499; VLDQ-NEXT: retq 2500; 2501; DQNOVL-LABEL: test_16f32tosb: 2502; DQNOVL: # %bb.0: 2503; DQNOVL-NEXT: vcvttps2dq %zmm0, %zmm0 2504; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 2505; DQNOVL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 2506; DQNOVL-NEXT: retq 2507 %mask = fptosi <16 x float> %a to <16 x i1> 2508 %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer 2509 ret <16 x i32> %select 2510} 2511