1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 7 8define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 9; CHECK-LABEL: addpd512: 10; CHECK: # %bb.0: # %entry 11; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 12; CHECK-NEXT: retq 13entry: 14 %add.i = fadd <8 x double> %x, %y 15 ret <8 x double> %add.i 16} 17 18define <8 x double> @addpd512fold(<8 x double> %y) { 19; CHECK-LABEL: addpd512fold: 20; CHECK: # %bb.0: # %entry 21; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 22; CHECK-NEXT: retq 23entry: 24 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 25 ret <8 x double> %add.i 26} 27 28define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 29; CHECK-LABEL: addps512: 30; CHECK: # %bb.0: # %entry 31; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 32; CHECK-NEXT: retq 33entry: 34 %add.i = fadd <16 x float> %x, %y 35 ret <16 x float> %add.i 36} 37 38define <16 x float> @addps512fold(<16 x float> %y) { 39; CHECK-LABEL: addps512fold: 40; CHECK: # %bb.0: # %entry 41; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 42; CHECK-NEXT: retq 43entry: 44 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 45 ret <16 x float> %add.i 46} 47 48define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 49; CHECK-LABEL: subpd512: 50; CHECK: # %bb.0: # %entry 51; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 52; CHECK-NEXT: retq 53entry: 54 %sub.i = fsub <8 x double> %x, %y 55 ret <8 x double> %sub.i 56} 57 58define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 59; CHECK-LABEL: subpd512fold: 60; CHECK: # %bb.0: # %entry 61; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 62; CHECK-NEXT: retq 63entry: 64 %tmp2 = load <8 x double>, <8 x double>* %x, align 8 65 %sub.i = fsub <8 x double> %y, %tmp2 66 ret <8 x double> %sub.i 67} 68 69define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 70; CHECK-LABEL: subps512: 71; CHECK: # %bb.0: # %entry 72; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 73; CHECK-NEXT: retq 74entry: 75 %sub.i = fsub <16 x float> %x, %y 76 ret <16 x float> %sub.i 77} 78 79define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 80; CHECK-LABEL: subps512fold: 81; CHECK: # %bb.0: # %entry 82; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 83; CHECK-NEXT: retq 84entry: 85 %tmp2 = load <16 x float>, <16 x float>* %x, align 4 86 %sub.i = fsub <16 x float> %y, %tmp2 87 ret <16 x float> %sub.i 88} 89 90define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 91; AVX512F-LABEL: imulq512: 92; AVX512F: # %bb.0: 93; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm2 94; AVX512F-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 95; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3 96; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 97; AVX512F-NEXT: vpaddq %zmm2, %zmm3, %zmm2 98; AVX512F-NEXT: vpsllq $32, %zmm2, %zmm2 99; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 100; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0 101; AVX512F-NEXT: retq 102; 103; AVX512VL-LABEL: imulq512: 104; AVX512VL: # %bb.0: 105; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm2 106; AVX512VL-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 107; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3 108; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 109; AVX512VL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 110; AVX512VL-NEXT: vpsllq $32, %zmm2, %zmm2 111; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 112; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 113; AVX512VL-NEXT: retq 114; 115; AVX512BW-LABEL: imulq512: 116; AVX512BW: # %bb.0: 117; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm2 118; AVX512BW-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 119; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3 120; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 121; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 122; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 123; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 124; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0 125; AVX512BW-NEXT: retq 126; 127; AVX512DQ-LABEL: imulq512: 128; AVX512DQ: # %bb.0: 129; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 130; AVX512DQ-NEXT: retq 131; 132; SKX-LABEL: imulq512: 133; SKX: # %bb.0: 134; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 135; SKX-NEXT: retq 136 %z = mul <8 x i64>%x, %y 137 ret <8 x i64>%z 138} 139 140define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { 141; AVX512F-LABEL: imulq256: 142; AVX512F: # %bb.0: 143; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm2 144; AVX512F-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 145; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3 146; AVX512F-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 147; AVX512F-NEXT: vpaddq %ymm2, %ymm3, %ymm2 148; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2 149; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 150; AVX512F-NEXT: vpaddq %ymm2, %ymm0, %ymm0 151; AVX512F-NEXT: retq 152; 153; AVX512VL-LABEL: imulq256: 154; AVX512VL: # %bb.0: 155; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm2 156; AVX512VL-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 157; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3 158; AVX512VL-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 159; AVX512VL-NEXT: vpaddq %ymm2, %ymm3, %ymm2 160; AVX512VL-NEXT: vpsllq $32, %ymm2, %ymm2 161; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 162; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0 163; AVX512VL-NEXT: retq 164; 165; AVX512BW-LABEL: imulq256: 166; AVX512BW: # %bb.0: 167; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm2 168; AVX512BW-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 169; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3 170; AVX512BW-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 171; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2 172; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2 173; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 174; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0 175; AVX512BW-NEXT: retq 176; 177; AVX512DQ-LABEL: imulq256: 178; AVX512DQ: # %bb.0: 179; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 180; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 181; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 182; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 183; AVX512DQ-NEXT: retq 184; 185; SKX-LABEL: imulq256: 186; SKX: # %bb.0: 187; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 188; SKX-NEXT: retq 189 %z = mul <4 x i64>%x, %y 190 ret <4 x i64>%z 191} 192 193define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { 194; AVX512F-LABEL: imulq128: 195; AVX512F: # %bb.0: 196; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm2 197; AVX512F-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 198; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3 199; AVX512F-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 200; AVX512F-NEXT: vpaddq %xmm2, %xmm3, %xmm2 201; AVX512F-NEXT: vpsllq $32, %xmm2, %xmm2 202; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 203; AVX512F-NEXT: vpaddq %xmm2, %xmm0, %xmm0 204; AVX512F-NEXT: retq 205; 206; AVX512VL-LABEL: imulq128: 207; AVX512VL: # %bb.0: 208; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm2 209; AVX512VL-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 210; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3 211; AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 212; AVX512VL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 213; AVX512VL-NEXT: vpsllq $32, %xmm2, %xmm2 214; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 215; AVX512VL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 216; AVX512VL-NEXT: retq 217; 218; AVX512BW-LABEL: imulq128: 219; AVX512BW: # %bb.0: 220; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm2 221; AVX512BW-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 222; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3 223; AVX512BW-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 224; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 225; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 226; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 227; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 228; AVX512BW-NEXT: retq 229; 230; AVX512DQ-LABEL: imulq128: 231; AVX512DQ: # %bb.0: 232; AVX512DQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 233; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 234; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 235; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 236; AVX512DQ-NEXT: vzeroupper 237; AVX512DQ-NEXT: retq 238; 239; SKX-LABEL: imulq128: 240; SKX: # %bb.0: 241; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 242; SKX-NEXT: retq 243 %z = mul <2 x i64>%x, %y 244 ret <2 x i64>%z 245} 246 247define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 248; CHECK-LABEL: mulpd512: 249; CHECK: # %bb.0: # %entry 250; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 251; CHECK-NEXT: retq 252entry: 253 %mul.i = fmul <8 x double> %x, %y 254 ret <8 x double> %mul.i 255} 256 257define <8 x double> @mulpd512fold(<8 x double> %y) { 258; CHECK-LABEL: mulpd512fold: 259; CHECK: # %bb.0: # %entry 260; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 261; CHECK-NEXT: retq 262entry: 263 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 264 ret <8 x double> %mul.i 265} 266 267define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 268; CHECK-LABEL: mulps512: 269; CHECK: # %bb.0: # %entry 270; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 271; CHECK-NEXT: retq 272entry: 273 %mul.i = fmul <16 x float> %x, %y 274 ret <16 x float> %mul.i 275} 276 277define <16 x float> @mulps512fold(<16 x float> %y) { 278; CHECK-LABEL: mulps512fold: 279; CHECK: # %bb.0: # %entry 280; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 281; CHECK-NEXT: retq 282entry: 283 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 284 ret <16 x float> %mul.i 285} 286 287define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 288; CHECK-LABEL: divpd512: 289; CHECK: # %bb.0: # %entry 290; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 291; CHECK-NEXT: retq 292entry: 293 %div.i = fdiv <8 x double> %x, %y 294 ret <8 x double> %div.i 295} 296 297define <8 x double> @divpd512fold(<8 x double> %y) { 298; CHECK-LABEL: divpd512fold: 299; CHECK: # %bb.0: # %entry 300; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 301; CHECK-NEXT: retq 302entry: 303 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 304 ret <8 x double> %div.i 305} 306 307define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 308; CHECK-LABEL: divps512: 309; CHECK: # %bb.0: # %entry 310; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 311; CHECK-NEXT: retq 312entry: 313 %div.i = fdiv <16 x float> %x, %y 314 ret <16 x float> %div.i 315} 316 317define <16 x float> @divps512fold(<16 x float> %y) { 318; CHECK-LABEL: divps512fold: 319; CHECK: # %bb.0: # %entry 320; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 321; CHECK-NEXT: retq 322entry: 323 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 324 ret <16 x float> %div.i 325} 326 327define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 328; CHECK-LABEL: vpaddq_test: 329; CHECK: # %bb.0: 330; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 331; CHECK-NEXT: retq 332 %x = add <8 x i64> %i, %j 333 ret <8 x i64> %x 334} 335 336define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { 337; CHECK-LABEL: vpaddq_fold_test: 338; CHECK: # %bb.0: 339; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 340; CHECK-NEXT: retq 341 %tmp = load <8 x i64>, <8 x i64>* %j, align 4 342 %x = add <8 x i64> %i, %tmp 343 ret <8 x i64> %x 344} 345 346define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 347; CHECK-LABEL: vpaddq_broadcast_test: 348; CHECK: # %bb.0: 349; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 350; CHECK-NEXT: retq 351 %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 352 ret <8 x i64> %x 353} 354 355define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { 356; CHECK-LABEL: vpaddq_broadcast2_test: 357; CHECK: # %bb.0: 358; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 359; CHECK-NEXT: retq 360 %tmp = load i64, i64* %j 361 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 362 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 363 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 364 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 365 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 366 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 367 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 368 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 369 %x = add <8 x i64> %i, %j.7 370 ret <8 x i64> %x 371} 372 373define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 374; CHECK-LABEL: vpaddd_test: 375; CHECK: # %bb.0: 376; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 377; CHECK-NEXT: retq 378 %x = add <16 x i32> %i, %j 379 ret <16 x i32> %x 380} 381 382define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { 383; CHECK-LABEL: vpaddd_fold_test: 384; CHECK: # %bb.0: 385; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 386; CHECK-NEXT: retq 387 %tmp = load <16 x i32>, <16 x i32>* %j, align 4 388 %x = add <16 x i32> %i, %tmp 389 ret <16 x i32> %x 390} 391 392define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 393; CHECK-LABEL: vpaddd_broadcast_test: 394; CHECK: # %bb.0: 395; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 396; CHECK-NEXT: retq 397 %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 398 ret <16 x i32> %x 399} 400 401define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 402; CHECK-LABEL: vpaddd_mask_test: 403; CHECK: # %bb.0: 404; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 405; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} 406; CHECK-NEXT: retq 407 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 408 %x = add <16 x i32> %i, %j 409 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 410 ret <16 x i32> %r 411} 412 413define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 414; CHECK-LABEL: vpaddd_maskz_test: 415; CHECK: # %bb.0: 416; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 417; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 418; CHECK-NEXT: retq 419 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 420 %x = add <16 x i32> %i, %j 421 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 422 ret <16 x i32> %r 423} 424 425define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 426; CHECK-LABEL: vpaddd_mask_fold_test: 427; CHECK: # %bb.0: 428; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 429; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} 430; CHECK-NEXT: retq 431 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 432 %j = load <16 x i32>, <16 x i32>* %j.ptr 433 %x = add <16 x i32> %i, %j 434 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 435 ret <16 x i32> %r 436} 437 438define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 439; CHECK-LABEL: vpaddd_mask_broadcast_test: 440; CHECK: # %bb.0: 441; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 442; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} 443; CHECK-NEXT: retq 444 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 445 %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 446 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 447 ret <16 x i32> %r 448} 449 450define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 451; CHECK-LABEL: vpaddd_maskz_fold_test: 452; CHECK: # %bb.0: 453; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 454; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 455; CHECK-NEXT: retq 456 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 457 %j = load <16 x i32>, <16 x i32>* %j.ptr 458 %x = add <16 x i32> %i, %j 459 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 460 ret <16 x i32> %r 461} 462 463define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 464; CHECK-LABEL: vpaddd_maskz_broadcast_test: 465; CHECK: # %bb.0: 466; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 467; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} 468; CHECK-NEXT: retq 469 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 470 %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 471 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 472 ret <16 x i32> %r 473} 474 475define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 476; CHECK-LABEL: vpsubq_test: 477; CHECK: # %bb.0: 478; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 479; CHECK-NEXT: retq 480 %x = sub <8 x i64> %i, %j 481 ret <8 x i64> %x 482} 483 484define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 485; CHECK-LABEL: vpsubd_test: 486; CHECK: # %bb.0: 487; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 488; CHECK-NEXT: retq 489 %x = sub <16 x i32> %i, %j 490 ret <16 x i32> %x 491} 492 493define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 494; CHECK-LABEL: vpmulld_test: 495; CHECK: # %bb.0: 496; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 497; CHECK-NEXT: retq 498 %x = mul <16 x i32> %i, %j 499 ret <16 x i32> %x 500} 501 502declare float @sqrtf(float) readnone 503define float @sqrtA(float %a) nounwind uwtable readnone ssp { 504; CHECK-LABEL: sqrtA: 505; CHECK: # %bb.0: # %entry 506; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 507; CHECK-NEXT: retq 508entry: 509 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 510 ret float %conv1 511} 512 513declare double @sqrt(double) readnone 514define double @sqrtB(double %a) nounwind uwtable readnone ssp { 515; CHECK-LABEL: sqrtB: 516; CHECK: # %bb.0: # %entry 517; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 518; CHECK-NEXT: retq 519entry: 520 %call = tail call double @sqrt(double %a) nounwind readnone 521 ret double %call 522} 523 524declare float @llvm.sqrt.f32(float) 525define float @sqrtC(float %a) nounwind { 526; CHECK-LABEL: sqrtC: 527; CHECK: # %bb.0: 528; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 529; CHECK-NEXT: retq 530 %b = call float @llvm.sqrt.f32(float %a) 531 ret float %b 532} 533 534declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 535define <16 x float> @sqrtD(<16 x float> %a) nounwind { 536; CHECK-LABEL: sqrtD: 537; CHECK: # %bb.0: 538; CHECK-NEXT: vsqrtps %zmm0, %zmm0 539; CHECK-NEXT: retq 540 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 541 ret <16 x float> %b 542} 543 544declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 545define <8 x double> @sqrtE(<8 x double> %a) nounwind { 546; CHECK-LABEL: sqrtE: 547; CHECK: # %bb.0: 548; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 549; CHECK-NEXT: retq 550 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 551 ret <8 x double> %b 552} 553 554define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 555; CHECK-LABEL: fadd_broadcast: 556; CHECK: # %bb.0: 557; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 558; CHECK-NEXT: retq 559 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 560 ret <16 x float> %b 561} 562 563define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 564; CHECK-LABEL: addq_broadcast: 565; CHECK: # %bb.0: 566; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 567; CHECK-NEXT: retq 568 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 569 ret <8 x i64> %b 570} 571 572define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 573; AVX512F-LABEL: orq_broadcast: 574; AVX512F: # %bb.0: 575; AVX512F-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 576; AVX512F-NEXT: retq 577; 578; AVX512VL-LABEL: orq_broadcast: 579; AVX512VL: # %bb.0: 580; AVX512VL-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 581; AVX512VL-NEXT: retq 582; 583; AVX512BW-LABEL: orq_broadcast: 584; AVX512BW: # %bb.0: 585; AVX512BW-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 586; AVX512BW-NEXT: retq 587; 588; AVX512DQ-LABEL: orq_broadcast: 589; AVX512DQ: # %bb.0: 590; AVX512DQ-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 591; AVX512DQ-NEXT: retq 592; 593; SKX-LABEL: orq_broadcast: 594; SKX: # %bb.0: 595; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 596; SKX-NEXT: retq 597 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 598 ret <8 x i64> %b 599} 600 601define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 602; AVX512F-LABEL: andd512fold: 603; AVX512F: # %bb.0: # %entry 604; AVX512F-NEXT: vpandq (%rdi), %zmm0, %zmm0 605; AVX512F-NEXT: retq 606; 607; AVX512VL-LABEL: andd512fold: 608; AVX512VL: # %bb.0: # %entry 609; AVX512VL-NEXT: vpandq (%rdi), %zmm0, %zmm0 610; AVX512VL-NEXT: retq 611; 612; AVX512BW-LABEL: andd512fold: 613; AVX512BW: # %bb.0: # %entry 614; AVX512BW-NEXT: vpandq (%rdi), %zmm0, %zmm0 615; AVX512BW-NEXT: retq 616; 617; AVX512DQ-LABEL: andd512fold: 618; AVX512DQ: # %bb.0: # %entry 619; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0 620; AVX512DQ-NEXT: retq 621; 622; SKX-LABEL: andd512fold: 623; SKX: # %bb.0: # %entry 624; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 625; SKX-NEXT: retq 626entry: 627 %a = load <16 x i32>, <16 x i32>* %x, align 4 628 %b = and <16 x i32> %y, %a 629 ret <16 x i32> %b 630} 631 632define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 633; AVX512F-LABEL: andqbrst: 634; AVX512F: # %bb.0: # %entry 635; AVX512F-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 636; AVX512F-NEXT: retq 637; 638; AVX512VL-LABEL: andqbrst: 639; AVX512VL: # %bb.0: # %entry 640; AVX512VL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 641; AVX512VL-NEXT: retq 642; 643; AVX512BW-LABEL: andqbrst: 644; AVX512BW: # %bb.0: # %entry 645; AVX512BW-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 646; AVX512BW-NEXT: retq 647; 648; AVX512DQ-LABEL: andqbrst: 649; AVX512DQ: # %bb.0: # %entry 650; AVX512DQ-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 651; AVX512DQ-NEXT: retq 652; 653; SKX-LABEL: andqbrst: 654; SKX: # %bb.0: # %entry 655; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 656; SKX-NEXT: retq 657entry: 658 %a = load i64, i64* %ap, align 8 659 %b = insertelement <8 x i64> undef, i64 %a, i32 0 660 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 661 %d = and <8 x i64> %p1, %c 662 ret <8 x i64>%d 663} 664 665define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, 666; CHECK-LABEL: test_mask_vaddps: 667; CHECK: # %bb.0: 668; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 669; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} 670; CHECK-NEXT: retq 671 <16 x float> %j, <16 x i32> %mask1) 672 nounwind readnone { 673 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 674 %x = fadd <16 x float> %i, %j 675 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 676 ret <16 x float> %r 677} 678 679define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, 680; CHECK-LABEL: test_mask_vmulps: 681; CHECK: # %bb.0: 682; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 683; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} 684; CHECK-NEXT: retq 685 <16 x float> %j, <16 x i32> %mask1) 686 nounwind readnone { 687 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 688 %x = fmul <16 x float> %i, %j 689 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 690 ret <16 x float> %r 691} 692 693define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, 694; CHECK-LABEL: test_mask_vminps: 695; CHECK: # %bb.0: 696; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 697; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} 698; CHECK-NEXT: retq 699 <16 x float> %j, <16 x i32> %mask1) 700 nounwind readnone { 701 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 702 %cmp_res = fcmp olt <16 x float> %i, %j 703 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 704 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst 705 ret <16 x float> %r 706} 707 708define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, 709; AVX512F-LABEL: test_mask_vminpd: 710; AVX512F: # %bb.0: 711; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 712; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 713; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 714; AVX512F-NEXT: retq 715; 716; AVX512VL-LABEL: test_mask_vminpd: 717; AVX512VL: # %bb.0: 718; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1 719; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 720; AVX512VL-NEXT: retq 721; 722; AVX512BW-LABEL: test_mask_vminpd: 723; AVX512BW: # %bb.0: 724; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 725; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1 726; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 727; AVX512BW-NEXT: retq 728; 729; AVX512DQ-LABEL: test_mask_vminpd: 730; AVX512DQ: # %bb.0: 731; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 732; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1 733; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 734; AVX512DQ-NEXT: retq 735; 736; SKX-LABEL: test_mask_vminpd: 737; SKX: # %bb.0: 738; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 739; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 740; SKX-NEXT: retq 741 <8 x double> %j, <8 x i32> %mask1) 742 nounwind readnone { 743 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 744 %cmp_res = fcmp olt <8 x double> %i, %j 745 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 746 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst 747 ret <8 x double> %r 748} 749 750define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, 751; CHECK-LABEL: test_mask_vmaxps: 752; CHECK: # %bb.0: 753; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 754; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} 755; CHECK-NEXT: retq 756 <16 x float> %j, <16 x i32> %mask1) 757 nounwind readnone { 758 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 759 %cmp_res = fcmp ogt <16 x float> %i, %j 760 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 761 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst 762 ret <16 x float> %r 763} 764 765define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, 766; AVX512F-LABEL: test_mask_vmaxpd: 767; AVX512F: # %bb.0: 768; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 769; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 770; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 771; AVX512F-NEXT: retq 772; 773; AVX512VL-LABEL: test_mask_vmaxpd: 774; AVX512VL: # %bb.0: 775; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1 776; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 777; AVX512VL-NEXT: retq 778; 779; AVX512BW-LABEL: test_mask_vmaxpd: 780; AVX512BW: # %bb.0: 781; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 782; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1 783; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 784; AVX512BW-NEXT: retq 785; 786; AVX512DQ-LABEL: test_mask_vmaxpd: 787; AVX512DQ: # %bb.0: 788; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 789; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1 790; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 791; AVX512DQ-NEXT: retq 792; 793; SKX-LABEL: test_mask_vmaxpd: 794; SKX: # %bb.0: 795; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 796; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 797; SKX-NEXT: retq 798 <8 x double> %j, <8 x i32> %mask1) 799 nounwind readnone { 800 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 801 %cmp_res = fcmp ogt <8 x double> %i, %j 802 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 803 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst 804 ret <8 x double> %r 805} 806 807define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, 808; CHECK-LABEL: test_mask_vsubps: 809; CHECK: # %bb.0: 810; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 811; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} 812; CHECK-NEXT: retq 813 <16 x float> %j, <16 x i32> %mask1) 814 nounwind readnone { 815 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 816 %x = fsub <16 x float> %i, %j 817 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 818 ret <16 x float> %r 819} 820 821define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, 822; CHECK-LABEL: test_mask_vdivps: 823; CHECK: # %bb.0: 824; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 825; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} 826; CHECK-NEXT: retq 827 <16 x float> %j, <16 x i32> %mask1) 828 nounwind readnone { 829 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 830 %x = fdiv <16 x float> %i, %j 831 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 832 ret <16 x float> %r 833} 834 835define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, 836; CHECK-LABEL: test_mask_vaddpd: 837; CHECK: # %bb.0: 838; CHECK-NEXT: vptestmq %zmm3, %zmm3, %k1 839; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} 840; CHECK-NEXT: retq 841 <8 x double> %j, <8 x i64> %mask1) 842 nounwind readnone { 843 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 844 %x = fadd <8 x double> %i, %j 845 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 846 ret <8 x double> %r 847} 848 849define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, 850; CHECK-LABEL: test_maskz_vaddpd: 851; CHECK: # %bb.0: 852; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 853; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} 854; CHECK-NEXT: retq 855 <8 x i64> %mask1) nounwind readnone { 856 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 857 %x = fadd <8 x double> %i, %j 858 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 859 ret <8 x double> %r 860} 861 862define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, 863; CHECK-LABEL: test_mask_fold_vaddpd: 864; CHECK: # %bb.0: 865; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 866; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} 867; CHECK-NEXT: retq 868 <8 x double>* %j, <8 x i64> %mask1) 869 nounwind { 870 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 871 %tmp = load <8 x double>, <8 x double>* %j, align 8 872 %x = fadd <8 x double> %i, %tmp 873 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 874 ret <8 x double> %r 875} 876 877define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, 878; CHECK-LABEL: test_maskz_fold_vaddpd: 879; CHECK: # %bb.0: 880; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 881; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} 882; CHECK-NEXT: retq 883 <8 x i64> %mask1) nounwind { 884 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 885 %tmp = load <8 x double>, <8 x double>* %j, align 8 886 %x = fadd <8 x double> %i, %tmp 887 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 888 ret <8 x double> %r 889} 890 891define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { 892; CHECK-LABEL: test_broadcast_vaddpd: 893; CHECK: # %bb.0: 894; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 895; CHECK-NEXT: retq 896 %tmp = load double, double* %j 897 %b = insertelement <8 x double> undef, double %tmp, i32 0 898 %c = shufflevector <8 x double> %b, <8 x double> undef, 899 <8 x i32> zeroinitializer 900 %x = fadd <8 x double> %c, %i 901 ret <8 x double> %x 902} 903 904define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, 905; CHECK-LABEL: test_mask_broadcast_vaddpd: 906; CHECK: # %bb.0: 907; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 908; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} 909; CHECK-NEXT: vmovapd %zmm1, %zmm0 910; CHECK-NEXT: retq 911 double* %j, <8 x i64> %mask1) nounwind { 912 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 913 %tmp = load double, double* %j 914 %b = insertelement <8 x double> undef, double %tmp, i32 0 915 %c = shufflevector <8 x double> %b, <8 x double> undef, 916 <8 x i32> zeroinitializer 917 %x = fadd <8 x double> %c, %i 918 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i 919 ret <8 x double> %r 920} 921 922define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, 923; CHECK-LABEL: test_maskz_broadcast_vaddpd: 924; CHECK: # %bb.0: 925; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 926; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 927; CHECK-NEXT: retq 928 <8 x i64> %mask1) nounwind { 929 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 930 %tmp = load double, double* %j 931 %b = insertelement <8 x double> undef, double %tmp, i32 0 932 %c = shufflevector <8 x double> %b, <8 x double> undef, 933 <8 x i32> zeroinitializer 934 %x = fadd <8 x double> %c, %i 935 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 936 ret <8 x double> %r 937} 938 939define <16 x float> @test_fxor(<16 x float> %a) { 940; AVX512F-LABEL: test_fxor: 941; AVX512F: # %bb.0: 942; AVX512F-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 943; AVX512F-NEXT: retq 944; 945; AVX512VL-LABEL: test_fxor: 946; AVX512VL: # %bb.0: 947; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 948; AVX512VL-NEXT: retq 949; 950; AVX512BW-LABEL: test_fxor: 951; AVX512BW: # %bb.0: 952; AVX512BW-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 953; AVX512BW-NEXT: retq 954; 955; AVX512DQ-LABEL: test_fxor: 956; AVX512DQ: # %bb.0: 957; AVX512DQ-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 958; AVX512DQ-NEXT: retq 959; 960; SKX-LABEL: test_fxor: 961; SKX: # %bb.0: 962; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 963; SKX-NEXT: retq 964 965 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 966 ret <16 x float>%res 967} 968 969define <8 x float> @test_fxor_8f32(<8 x float> %a) { 970; AVX512F-LABEL: test_fxor_8f32: 971; AVX512F: # %bb.0: 972; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0] 973; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0 974; AVX512F-NEXT: retq 975; 976; AVX512VL-LABEL: test_fxor_8f32: 977; AVX512VL: # %bb.0: 978; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0 979; AVX512VL-NEXT: retq 980; 981; AVX512BW-LABEL: test_fxor_8f32: 982; AVX512BW: # %bb.0: 983; AVX512BW-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0] 984; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0 985; AVX512BW-NEXT: retq 986; 987; AVX512DQ-LABEL: test_fxor_8f32: 988; AVX512DQ: # %bb.0: 989; AVX512DQ-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0,-0,-0,-0,-0,-0,-0,-0] 990; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0 991; AVX512DQ-NEXT: retq 992; 993; SKX-LABEL: test_fxor_8f32: 994; SKX: # %bb.0: 995; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 996; SKX-NEXT: retq 997 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 998 ret <8 x float>%res 999} 1000 1001define <8 x double> @fabs_v8f64(<8 x double> %p) 1002; AVX512F-LABEL: fabs_v8f64: 1003; AVX512F: # %bb.0: 1004; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 1005; AVX512F-NEXT: retq 1006; 1007; AVX512VL-LABEL: fabs_v8f64: 1008; AVX512VL: # %bb.0: 1009; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 1010; AVX512VL-NEXT: retq 1011; 1012; AVX512BW-LABEL: fabs_v8f64: 1013; AVX512BW: # %bb.0: 1014; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 1015; AVX512BW-NEXT: retq 1016; 1017; AVX512DQ-LABEL: fabs_v8f64: 1018; AVX512DQ: # %bb.0: 1019; AVX512DQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 1020; AVX512DQ-NEXT: retq 1021; 1022; SKX-LABEL: fabs_v8f64: 1023; SKX: # %bb.0: 1024; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 1025; SKX-NEXT: retq 1026{ 1027 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1028 ret <8 x double> %t 1029} 1030declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1031 1032define <16 x float> @fabs_v16f32(<16 x float> %p) 1033; AVX512F-LABEL: fabs_v16f32: 1034; AVX512F: # %bb.0: 1035; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 1036; AVX512F-NEXT: retq 1037; 1038; AVX512VL-LABEL: fabs_v16f32: 1039; AVX512VL: # %bb.0: 1040; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 1041; AVX512VL-NEXT: retq 1042; 1043; AVX512BW-LABEL: fabs_v16f32: 1044; AVX512BW: # %bb.0: 1045; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 1046; AVX512BW-NEXT: retq 1047; 1048; AVX512DQ-LABEL: fabs_v16f32: 1049; AVX512DQ: # %bb.0: 1050; AVX512DQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 1051; AVX512DQ-NEXT: retq 1052; 1053; SKX-LABEL: fabs_v16f32: 1054; SKX: # %bb.0: 1055; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 1056; SKX-NEXT: retq 1057{ 1058 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1059 ret <16 x float> %t 1060} 1061declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1062