1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vldq-builtins.c 6 7define <2 x double> @test_mm_cvtepi64_pd(<2 x i64> %__A) { 8; CHECK-LABEL: test_mm_cvtepi64_pd: 9; CHECK: # %bb.0: # %entry 10; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 11; CHECK-NEXT: ret{{[l|q]}} 12entry: 13 %conv.i = sitofp <2 x i64> %__A to <2 x double> 14 ret <2 x double> %conv.i 15} 16 17define <2 x double> @test_mm_mask_cvtepi64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) { 18; X86-LABEL: test_mm_mask_cvtepi64_pd: 19; X86: # %bb.0: # %entry 20; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 21; X86-NEXT: vcvtqq2pd %xmm1, %xmm0 {%k1} 22; X86-NEXT: retl 23; 24; X64-LABEL: test_mm_mask_cvtepi64_pd: 25; X64: # %bb.0: # %entry 26; X64-NEXT: kmovw %edi, %k1 27; X64-NEXT: vcvtqq2pd %xmm1, %xmm0 {%k1} 28; X64-NEXT: retq 29entry: 30 %conv.i.i = sitofp <2 x i64> %__A to <2 x double> 31 %0 = bitcast i8 %__U to <8 x i1> 32 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 33 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W 34 ret <2 x double> %1 35} 36 37define <2 x double> @test_mm_maskz_cvtepi64_pd(i8 zeroext %__U, <2 x i64> %__A) { 38; X86-LABEL: test_mm_maskz_cvtepi64_pd: 39; X86: # %bb.0: # %entry 40; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 41; X86-NEXT: vcvtqq2pd %xmm0, %xmm0 {%k1} {z} 42; X86-NEXT: retl 43; 44; X64-LABEL: test_mm_maskz_cvtepi64_pd: 45; X64: # %bb.0: # %entry 46; X64-NEXT: kmovw %edi, %k1 47; X64-NEXT: vcvtqq2pd %xmm0, %xmm0 {%k1} {z} 48; X64-NEXT: retq 49entry: 50 %conv.i.i = sitofp <2 x i64> %__A to <2 x double> 51 %0 = bitcast i8 %__U to <8 x i1> 52 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 53 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer 54 ret <2 x double> %1 55} 56 57define <4 x double> @test_mm256_cvtepi64_pd(<4 x i64> %__A) { 58; CHECK-LABEL: test_mm256_cvtepi64_pd: 59; CHECK: # %bb.0: # %entry 60; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 61; CHECK-NEXT: ret{{[l|q]}} 62entry: 63 %conv.i = sitofp <4 x i64> %__A to <4 x double> 64 ret <4 x double> %conv.i 65} 66 67define <4 x double> @test_mm256_mask_cvtepi64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) { 68; X86-LABEL: test_mm256_mask_cvtepi64_pd: 69; X86: # %bb.0: # %entry 70; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 71; X86-NEXT: vcvtqq2pd %ymm1, %ymm0 {%k1} 72; X86-NEXT: retl 73; 74; X64-LABEL: test_mm256_mask_cvtepi64_pd: 75; X64: # %bb.0: # %entry 76; X64-NEXT: kmovw %edi, %k1 77; X64-NEXT: vcvtqq2pd %ymm1, %ymm0 {%k1} 78; X64-NEXT: retq 79entry: 80 %conv.i.i = sitofp <4 x i64> %__A to <4 x double> 81 %0 = bitcast i8 %__U to <8 x i1> 82 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 83 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W 84 ret <4 x double> %1 85} 86 87define <4 x double> @test_mm256_maskz_cvtepi64_pd(i8 zeroext %__U, <4 x i64> %__A) { 88; X86-LABEL: test_mm256_maskz_cvtepi64_pd: 89; X86: # %bb.0: # %entry 90; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 91; X86-NEXT: vcvtqq2pd %ymm0, %ymm0 {%k1} {z} 92; X86-NEXT: retl 93; 94; X64-LABEL: test_mm256_maskz_cvtepi64_pd: 95; X64: # %bb.0: # %entry 96; X64-NEXT: kmovw %edi, %k1 97; X64-NEXT: vcvtqq2pd %ymm0, %ymm0 {%k1} {z} 98; X64-NEXT: retq 99entry: 100 %conv.i.i = sitofp <4 x i64> %__A to <4 x double> 101 %0 = bitcast i8 %__U to <8 x i1> 102 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 103 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer 104 ret <4 x double> %1 105} 106 107define <2 x double> @test_mm_cvtepu64_pd(<2 x i64> %__A) { 108; CHECK-LABEL: test_mm_cvtepu64_pd: 109; CHECK: # %bb.0: # %entry 110; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 111; CHECK-NEXT: ret{{[l|q]}} 112entry: 113 %conv.i = uitofp <2 x i64> %__A to <2 x double> 114 ret <2 x double> %conv.i 115} 116 117define <2 x double> @test_mm_mask_cvtepu64_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) { 118; X86-LABEL: test_mm_mask_cvtepu64_pd: 119; X86: # %bb.0: # %entry 120; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 121; X86-NEXT: vcvtuqq2pd %xmm1, %xmm0 {%k1} 122; X86-NEXT: retl 123; 124; X64-LABEL: test_mm_mask_cvtepu64_pd: 125; X64: # %bb.0: # %entry 126; X64-NEXT: kmovw %edi, %k1 127; X64-NEXT: vcvtuqq2pd %xmm1, %xmm0 {%k1} 128; X64-NEXT: retq 129entry: 130 %conv.i.i = uitofp <2 x i64> %__A to <2 x double> 131 %0 = bitcast i8 %__U to <8 x i1> 132 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 133 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> %__W 134 ret <2 x double> %1 135} 136 137define <2 x double> @test_mm_maskz_cvtepu64_pd(i8 zeroext %__U, <2 x i64> %__A) { 138; X86-LABEL: test_mm_maskz_cvtepu64_pd: 139; X86: # %bb.0: # %entry 140; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 141; X86-NEXT: vcvtuqq2pd %xmm0, %xmm0 {%k1} {z} 142; X86-NEXT: retl 143; 144; X64-LABEL: test_mm_maskz_cvtepu64_pd: 145; X64: # %bb.0: # %entry 146; X64-NEXT: kmovw %edi, %k1 147; X64-NEXT: vcvtuqq2pd %xmm0, %xmm0 {%k1} {z} 148; X64-NEXT: retq 149entry: 150 %conv.i.i = uitofp <2 x i64> %__A to <2 x double> 151 %0 = bitcast i8 %__U to <8 x i1> 152 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 153 %1 = select <2 x i1> %extract.i, <2 x double> %conv.i.i, <2 x double> zeroinitializer 154 ret <2 x double> %1 155} 156 157define <4 x double> @test_mm256_cvtepu64_pd(<4 x i64> %__A) { 158; CHECK-LABEL: test_mm256_cvtepu64_pd: 159; CHECK: # %bb.0: # %entry 160; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 161; CHECK-NEXT: ret{{[l|q]}} 162entry: 163 %conv.i = uitofp <4 x i64> %__A to <4 x double> 164 ret <4 x double> %conv.i 165} 166 167define <4 x double> @test_mm256_mask_cvtepu64_pd(<4 x double> %__W, i8 zeroext %__U, <4 x i64> %__A) { 168; X86-LABEL: test_mm256_mask_cvtepu64_pd: 169; X86: # %bb.0: # %entry 170; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 171; X86-NEXT: vcvtuqq2pd %ymm1, %ymm0 {%k1} 172; X86-NEXT: retl 173; 174; X64-LABEL: test_mm256_mask_cvtepu64_pd: 175; X64: # %bb.0: # %entry 176; X64-NEXT: kmovw %edi, %k1 177; X64-NEXT: vcvtuqq2pd %ymm1, %ymm0 {%k1} 178; X64-NEXT: retq 179entry: 180 %conv.i.i = uitofp <4 x i64> %__A to <4 x double> 181 %0 = bitcast i8 %__U to <8 x i1> 182 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 183 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> %__W 184 ret <4 x double> %1 185} 186 187define <4 x double> @test_mm256_maskz_cvtepu64_pd(i8 zeroext %__U, <4 x i64> %__A) { 188; X86-LABEL: test_mm256_maskz_cvtepu64_pd: 189; X86: # %bb.0: # %entry 190; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 191; X86-NEXT: vcvtuqq2pd %ymm0, %ymm0 {%k1} {z} 192; X86-NEXT: retl 193; 194; X64-LABEL: test_mm256_maskz_cvtepu64_pd: 195; X64: # %bb.0: # %entry 196; X64-NEXT: kmovw %edi, %k1 197; X64-NEXT: vcvtuqq2pd %ymm0, %ymm0 {%k1} {z} 198; X64-NEXT: retq 199entry: 200 %conv.i.i = uitofp <4 x i64> %__A to <4 x double> 201 %0 = bitcast i8 %__U to <8 x i1> 202 %extract.i = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 203 %1 = select <4 x i1> %extract.i, <4 x double> %conv.i.i, <4 x double> zeroinitializer 204 ret <4 x double> %1 205} 206 207define zeroext i8 @test_mm_mask_fpclass_pd_mask(i8 zeroext %__U, <2 x double> %__A) { 208; X86-LABEL: test_mm_mask_fpclass_pd_mask: 209; X86: # %bb.0: # %entry 210; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 211; X86-NEXT: vfpclasspd $2, %xmm0, %k0 {%k1} 212; X86-NEXT: kmovw %k0, %eax 213; X86-NEXT: # kill: def $al killed $al killed $eax 214; X86-NEXT: retl 215; 216; X64-LABEL: test_mm_mask_fpclass_pd_mask: 217; X64: # %bb.0: # %entry 218; X64-NEXT: kmovw %edi, %k1 219; X64-NEXT: vfpclasspd $2, %xmm0, %k0 {%k1} 220; X64-NEXT: kmovw %k0, %eax 221; X64-NEXT: # kill: def $al killed $al killed $eax 222; X64-NEXT: retq 223entry: 224 %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2) 225 %1 = bitcast i8 %__U to <8 x i1> 226 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 227 %2 = and <2 x i1> %0, %extract 228 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 229 %4 = bitcast <8 x i1> %3 to i8 230 ret i8 %4 231} 232 233declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32) 234 235define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) { 236; CHECK-LABEL: test_mm_fpclass_pd_mask: 237; CHECK: # %bb.0: # %entry 238; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 239; CHECK-NEXT: kmovw %k0, %eax 240; CHECK-NEXT: # kill: def $al killed $al killed $eax 241; CHECK-NEXT: ret{{[l|q]}} 242entry: 243 %0 = tail call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %__A, i32 2) 244 %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 245 %2 = bitcast <8 x i1> %1 to i8 246 ret i8 %2 247} 248 249define zeroext i8 @test_mm256_mask_fpclass_pd_mask(i8 zeroext %__U, <4 x double> %__A) { 250; X86-LABEL: test_mm256_mask_fpclass_pd_mask: 251; X86: # %bb.0: # %entry 252; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 253; X86-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} 254; X86-NEXT: kmovw %k0, %eax 255; X86-NEXT: # kill: def $al killed $al killed $eax 256; X86-NEXT: vzeroupper 257; X86-NEXT: retl 258; 259; X64-LABEL: test_mm256_mask_fpclass_pd_mask: 260; X64: # %bb.0: # %entry 261; X64-NEXT: kmovw %edi, %k1 262; X64-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} 263; X64-NEXT: kmovw %k0, %eax 264; X64-NEXT: # kill: def $al killed $al killed $eax 265; X64-NEXT: vzeroupper 266; X64-NEXT: retq 267entry: 268 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2) 269 %1 = bitcast i8 %__U to <8 x i1> 270 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 271 %2 = and <4 x i1> %0, %extract 272 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 273 %4 = bitcast <8 x i1> %3 to i8 274 ret i8 %4 275} 276 277declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32) 278 279define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) { 280; CHECK-LABEL: test_mm256_fpclass_pd_mask: 281; CHECK: # %bb.0: # %entry 282; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 283; CHECK-NEXT: kmovw %k0, %eax 284; CHECK-NEXT: # kill: def $al killed $al killed $eax 285; CHECK-NEXT: vzeroupper 286; CHECK-NEXT: ret{{[l|q]}} 287entry: 288 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %__A, i32 2) 289 %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 290 %2 = bitcast <8 x i1> %1 to i8 291 ret i8 %2 292} 293 294define zeroext i8 @test_mm_mask_fpclass_ps_mask(i8 zeroext %__U, <4 x float> %__A) { 295; X86-LABEL: test_mm_mask_fpclass_ps_mask: 296; X86: # %bb.0: # %entry 297; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 298; X86-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} 299; X86-NEXT: kmovw %k0, %eax 300; X86-NEXT: # kill: def $al killed $al killed $eax 301; X86-NEXT: retl 302; 303; X64-LABEL: test_mm_mask_fpclass_ps_mask: 304; X64: # %bb.0: # %entry 305; X64-NEXT: kmovw %edi, %k1 306; X64-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} 307; X64-NEXT: kmovw %k0, %eax 308; X64-NEXT: # kill: def $al killed $al killed $eax 309; X64-NEXT: retq 310entry: 311 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2) 312 %1 = bitcast i8 %__U to <8 x i1> 313 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 314 %2 = and <4 x i1> %0, %extract 315 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 316 %4 = bitcast <8 x i1> %3 to i8 317 ret i8 %4 318} 319 320declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32) 321 322define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) { 323; CHECK-LABEL: test_mm_fpclass_ps_mask: 324; CHECK: # %bb.0: # %entry 325; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 326; CHECK-NEXT: kmovw %k0, %eax 327; CHECK-NEXT: # kill: def $al killed $al killed $eax 328; CHECK-NEXT: ret{{[l|q]}} 329entry: 330 %0 = tail call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %__A, i32 2) 331 %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 332 %2 = bitcast <8 x i1> %1 to i8 333 ret i8 %2 334} 335 336define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) { 337; X86-LABEL: test_mm256_mask_fpclass_ps_mask: 338; X86: # %bb.0: # %entry 339; X86-NEXT: vfpclassps $2, %ymm0, %k0 340; X86-NEXT: kmovw %k0, %eax 341; X86-NEXT: andb {{[0-9]+}}(%esp), %al 342; X86-NEXT: # kill: def $al killed $al killed $eax 343; X86-NEXT: vzeroupper 344; X86-NEXT: retl 345; 346; X64-LABEL: test_mm256_mask_fpclass_ps_mask: 347; X64: # %bb.0: # %entry 348; X64-NEXT: vfpclassps $2, %ymm0, %k0 349; X64-NEXT: kmovw %k0, %eax 350; X64-NEXT: andb %dil, %al 351; X64-NEXT: # kill: def $al killed $al killed $eax 352; X64-NEXT: vzeroupper 353; X64-NEXT: retq 354entry: 355 %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2) 356 %1 = bitcast i8 %__U to <8 x i1> 357 %2 = and <8 x i1> %0, %1 358 %3 = bitcast <8 x i1> %2 to i8 359 ret i8 %3 360} 361 362declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32) 363 364define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) { 365; CHECK-LABEL: test_mm256_fpclass_ps_mask: 366; CHECK: # %bb.0: # %entry 367; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 368; CHECK-NEXT: kmovw %k0, %eax 369; CHECK-NEXT: # kill: def $al killed $al killed $eax 370; CHECK-NEXT: vzeroupper 371; CHECK-NEXT: ret{{[l|q]}} 372entry: 373 %0 = tail call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %__A, i32 2) 374 %1 = bitcast <8 x i1> %0 to i8 375 ret i8 %1 376} 377