1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s 7 8define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 9; CHECK-LABEL: addpd512: 10; CHECK: ## BB#0: ## %entry 11; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 12; CHECK-NEXT: retq 13entry: 14 %add.i = fadd <8 x double> %x, %y 15 ret <8 x double> %add.i 16} 17 18define <8 x double> @addpd512fold(<8 x double> %y) { 19; CHECK-LABEL: addpd512fold: 20; CHECK: ## BB#0: ## %entry 21; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 22; CHECK-NEXT: retq 23entry: 24 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 25 ret <8 x double> %add.i 26} 27 28define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 29; CHECK-LABEL: addps512: 30; CHECK: ## BB#0: ## %entry 31; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 32; CHECK-NEXT: retq 33entry: 34 %add.i = fadd <16 x float> %x, %y 35 ret <16 x float> %add.i 36} 37 38define <16 x float> @addps512fold(<16 x float> %y) { 39; CHECK-LABEL: addps512fold: 40; CHECK: ## BB#0: ## %entry 41; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 42; CHECK-NEXT: retq 43entry: 44 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 45 ret <16 x float> %add.i 46} 47 48define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 49; CHECK-LABEL: subpd512: 50; CHECK: ## BB#0: ## %entry 51; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 52; CHECK-NEXT: retq 53entry: 54 %sub.i = fsub <8 x double> %x, %y 55 ret <8 x double> %sub.i 56} 57 58define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 59; CHECK-LABEL: subpd512fold: 60; CHECK: ## BB#0: ## %entry 61; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 62; CHECK-NEXT: retq 63entry: 64 %tmp2 = load <8 x double>, <8 x double>* %x, align 8 65 %sub.i = fsub <8 x double> %y, %tmp2 66 ret <8 x double> %sub.i 67} 68 69define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 70; CHECK-LABEL: subps512: 71; CHECK: ## BB#0: ## %entry 72; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 73; CHECK-NEXT: retq 74entry: 75 %sub.i = fsub <16 x float> %x, %y 76 ret <16 x float> %sub.i 77} 78 79define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 80; CHECK-LABEL: subps512fold: 81; CHECK: ## BB#0: ## %entry 82; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 83; CHECK-NEXT: retq 84entry: 85 %tmp2 = load <16 x float>, <16 x float>* %x, align 4 86 %sub.i = fsub <16 x float> %y, %tmp2 87 ret <16 x float> %sub.i 88} 89 90define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 91; AVX512F-LABEL: imulq512: 92; AVX512F: ## BB#0: 93; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 94; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3 95; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 96; AVX512F-NEXT: vpsllq $32, %zmm3, %zmm3 97; AVX512F-NEXT: vpaddq %zmm3, %zmm2, %zmm2 98; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm1 99; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 100; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0 101; AVX512F-NEXT: vpaddq %zmm0, %zmm2, %zmm0 102; AVX512F-NEXT: retq 103; 104; AVX512VL-LABEL: imulq512: 105; AVX512VL: ## BB#0: 106; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 107; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3 108; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 109; AVX512VL-NEXT: vpsllq $32, %zmm3, %zmm3 110; AVX512VL-NEXT: vpaddq %zmm3, %zmm2, %zmm2 111; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm1 112; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 113; AVX512VL-NEXT: vpsllq $32, %zmm0, %zmm0 114; AVX512VL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 115; AVX512VL-NEXT: retq 116; 117; AVX512BW-LABEL: imulq512: 118; AVX512BW: ## BB#0: 119; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 120; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3 121; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 122; AVX512BW-NEXT: vpsllq $32, %zmm3, %zmm3 123; AVX512BW-NEXT: vpaddq %zmm3, %zmm2, %zmm2 124; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm1 125; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 126; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0 127; AVX512BW-NEXT: vpaddq %zmm0, %zmm2, %zmm0 128; AVX512BW-NEXT: retq 129; 130; AVX512DQ-LABEL: imulq512: 131; AVX512DQ: ## BB#0: 132; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 133; AVX512DQ-NEXT: retq 134; 135; SKX-LABEL: imulq512: 136; SKX: ## BB#0: 137; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 138; SKX-NEXT: retq 139 %z = mul <8 x i64>%x, %y 140 ret <8 x i64>%z 141} 142 143define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { 144; AVX512F-LABEL: imulq256: 145; AVX512F: ## BB#0: 146; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 147; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3 148; AVX512F-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 149; AVX512F-NEXT: vpsllq $32, %ymm3, %ymm3 150; AVX512F-NEXT: vpaddq %ymm3, %ymm2, %ymm2 151; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm1 152; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 153; AVX512F-NEXT: vpsllq $32, %ymm0, %ymm0 154; AVX512F-NEXT: vpaddq %ymm0, %ymm2, %ymm0 155; AVX512F-NEXT: retq 156; 157; AVX512VL-LABEL: imulq256: 158; AVX512VL: ## BB#0: 159; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 160; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3 161; AVX512VL-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 162; AVX512VL-NEXT: vpsllq $32, %ymm3, %ymm3 163; AVX512VL-NEXT: vpaddq %ymm3, %ymm2, %ymm2 164; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm1 165; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 166; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0 167; AVX512VL-NEXT: vpaddq %ymm0, %ymm2, %ymm0 168; AVX512VL-NEXT: retq 169; 170; AVX512BW-LABEL: imulq256: 171; AVX512BW: ## BB#0: 172; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 173; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3 174; AVX512BW-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 175; AVX512BW-NEXT: vpsllq $32, %ymm3, %ymm3 176; AVX512BW-NEXT: vpaddq %ymm3, %ymm2, %ymm2 177; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm1 178; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 179; AVX512BW-NEXT: vpsllq $32, %ymm0, %ymm0 180; AVX512BW-NEXT: vpaddq %ymm0, %ymm2, %ymm0 181; AVX512BW-NEXT: retq 182; 183; AVX512DQ-LABEL: imulq256: 184; AVX512DQ: ## BB#0: 185; AVX512DQ-NEXT: vpmuludq %ymm0, %ymm1, %ymm2 186; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm3 187; AVX512DQ-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 188; AVX512DQ-NEXT: vpsllq $32, %ymm3, %ymm3 189; AVX512DQ-NEXT: vpaddq %ymm3, %ymm2, %ymm2 190; AVX512DQ-NEXT: vpsrlq $32, %ymm1, %ymm1 191; AVX512DQ-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 192; AVX512DQ-NEXT: vpsllq $32, %ymm0, %ymm0 193; AVX512DQ-NEXT: vpaddq %ymm0, %ymm2, %ymm0 194; AVX512DQ-NEXT: retq 195; 196; SKX-LABEL: imulq256: 197; SKX: ## BB#0: 198; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 199; SKX-NEXT: retq 200 %z = mul <4 x i64>%x, %y 201 ret <4 x i64>%z 202} 203 204define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { 205; AVX512F-LABEL: imulq128: 206; AVX512F: ## BB#0: 207; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 208; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3 209; AVX512F-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 210; AVX512F-NEXT: vpsllq $32, %xmm3, %xmm3 211; AVX512F-NEXT: vpaddq %xmm3, %xmm2, %xmm2 212; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm1 213; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 214; AVX512F-NEXT: vpsllq $32, %xmm0, %xmm0 215; AVX512F-NEXT: vpaddq %xmm0, %xmm2, %xmm0 216; AVX512F-NEXT: retq 217; 218; AVX512VL-LABEL: imulq128: 219; AVX512VL: ## BB#0: 220; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 221; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3 222; AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 223; AVX512VL-NEXT: vpsllq $32, %xmm3, %xmm3 224; AVX512VL-NEXT: vpaddq %xmm3, %xmm2, %xmm2 225; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm1 226; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 227; AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0 228; AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0 229; AVX512VL-NEXT: retq 230; 231; AVX512BW-LABEL: imulq128: 232; AVX512BW: ## BB#0: 233; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 234; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3 235; AVX512BW-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 236; AVX512BW-NEXT: vpsllq $32, %xmm3, %xmm3 237; AVX512BW-NEXT: vpaddq %xmm3, %xmm2, %xmm2 238; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm1 239; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 240; AVX512BW-NEXT: vpsllq $32, %xmm0, %xmm0 241; AVX512BW-NEXT: vpaddq %xmm0, %xmm2, %xmm0 242; AVX512BW-NEXT: retq 243; 244; AVX512DQ-LABEL: imulq128: 245; AVX512DQ: ## BB#0: 246; AVX512DQ-NEXT: vpmuludq %xmm0, %xmm1, %xmm2 247; AVX512DQ-NEXT: vpsrlq $32, %xmm0, %xmm3 248; AVX512DQ-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 249; AVX512DQ-NEXT: vpsllq $32, %xmm3, %xmm3 250; AVX512DQ-NEXT: vpaddq %xmm3, %xmm2, %xmm2 251; AVX512DQ-NEXT: vpsrlq $32, %xmm1, %xmm1 252; AVX512DQ-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 253; AVX512DQ-NEXT: vpsllq $32, %xmm0, %xmm0 254; AVX512DQ-NEXT: vpaddq %xmm0, %xmm2, %xmm0 255; AVX512DQ-NEXT: retq 256; 257; SKX-LABEL: imulq128: 258; SKX: ## BB#0: 259; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 260; SKX-NEXT: retq 261 %z = mul <2 x i64>%x, %y 262 ret <2 x i64>%z 263} 264 265define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 266; CHECK-LABEL: mulpd512: 267; CHECK: ## BB#0: ## %entry 268; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 269; CHECK-NEXT: retq 270entry: 271 %mul.i = fmul <8 x double> %x, %y 272 ret <8 x double> %mul.i 273} 274 275define <8 x double> @mulpd512fold(<8 x double> %y) { 276; CHECK-LABEL: mulpd512fold: 277; CHECK: ## BB#0: ## %entry 278; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 279; CHECK-NEXT: retq 280entry: 281 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 282 ret <8 x double> %mul.i 283} 284 285define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 286; CHECK-LABEL: mulps512: 287; CHECK: ## BB#0: ## %entry 288; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 289; CHECK-NEXT: retq 290entry: 291 %mul.i = fmul <16 x float> %x, %y 292 ret <16 x float> %mul.i 293} 294 295define <16 x float> @mulps512fold(<16 x float> %y) { 296; CHECK-LABEL: mulps512fold: 297; CHECK: ## BB#0: ## %entry 298; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 299; CHECK-NEXT: retq 300entry: 301 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 302 ret <16 x float> %mul.i 303} 304 305define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 306; CHECK-LABEL: divpd512: 307; CHECK: ## BB#0: ## %entry 308; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 309; CHECK-NEXT: retq 310entry: 311 %div.i = fdiv <8 x double> %x, %y 312 ret <8 x double> %div.i 313} 314 315define <8 x double> @divpd512fold(<8 x double> %y) { 316; CHECK-LABEL: divpd512fold: 317; CHECK: ## BB#0: ## %entry 318; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 319; CHECK-NEXT: retq 320entry: 321 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 322 ret <8 x double> %div.i 323} 324 325define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 326; CHECK-LABEL: divps512: 327; CHECK: ## BB#0: ## %entry 328; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 329; CHECK-NEXT: retq 330entry: 331 %div.i = fdiv <16 x float> %x, %y 332 ret <16 x float> %div.i 333} 334 335define <16 x float> @divps512fold(<16 x float> %y) { 336; CHECK-LABEL: divps512fold: 337; CHECK: ## BB#0: ## %entry 338; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 339; CHECK-NEXT: retq 340entry: 341 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 342 ret <16 x float> %div.i 343} 344 345define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 346; CHECK-LABEL: vpaddq_test: 347; CHECK: ## BB#0: 348; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 349; CHECK-NEXT: retq 350 %x = add <8 x i64> %i, %j 351 ret <8 x i64> %x 352} 353 354define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { 355; CHECK-LABEL: vpaddq_fold_test: 356; CHECK: ## BB#0: 357; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 358; CHECK-NEXT: retq 359 %tmp = load <8 x i64>, <8 x i64>* %j, align 4 360 %x = add <8 x i64> %i, %tmp 361 ret <8 x i64> %x 362} 363 364define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 365; CHECK-LABEL: vpaddq_broadcast_test: 366; CHECK: ## BB#0: 367; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 368; CHECK-NEXT: retq 369 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 370 ret <8 x i64> %x 371} 372 373define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { 374; CHECK-LABEL: vpaddq_broadcast2_test: 375; CHECK: ## BB#0: 376; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 377; CHECK-NEXT: retq 378 %tmp = load i64, i64* %j 379 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 380 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 381 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 382 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 383 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 384 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 385 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 386 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 387 %x = add <8 x i64> %i, %j.7 388 ret <8 x i64> %x 389} 390 391define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 392; CHECK-LABEL: vpaddd_test: 393; CHECK: ## BB#0: 394; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 395; CHECK-NEXT: retq 396 %x = add <16 x i32> %i, %j 397 ret <16 x i32> %x 398} 399 400define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { 401; CHECK-LABEL: vpaddd_fold_test: 402; CHECK: ## BB#0: 403; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 404; CHECK-NEXT: retq 405 %tmp = load <16 x i32>, <16 x i32>* %j, align 4 406 %x = add <16 x i32> %i, %tmp 407 ret <16 x i32> %x 408} 409 410define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 411; CHECK-LABEL: vpaddd_broadcast_test: 412; CHECK: ## BB#0: 413; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 414; CHECK-NEXT: retq 415 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 416 ret <16 x i32> %x 417} 418 419define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 420; CHECK-LABEL: vpaddd_mask_test: 421; CHECK: ## BB#0: 422; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 423; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 424; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} 425; CHECK-NEXT: retq 426 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 427 %x = add <16 x i32> %i, %j 428 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 429 ret <16 x i32> %r 430} 431 432define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 433; CHECK-LABEL: vpaddd_maskz_test: 434; CHECK: ## BB#0: 435; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 436; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 437; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 438; CHECK-NEXT: retq 439 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 440 %x = add <16 x i32> %i, %j 441 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 442 ret <16 x i32> %r 443} 444 445define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 446; CHECK-LABEL: vpaddd_mask_fold_test: 447; CHECK: ## BB#0: 448; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 449; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 450; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} 451; CHECK-NEXT: retq 452 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 453 %j = load <16 x i32>, <16 x i32>* %j.ptr 454 %x = add <16 x i32> %i, %j 455 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 456 ret <16 x i32> %r 457} 458 459define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 460; CHECK-LABEL: vpaddd_mask_broadcast_test: 461; CHECK: ## BB#0: 462; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 463; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 464; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} 465; CHECK-NEXT: retq 466 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 467 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 468 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 469 ret <16 x i32> %r 470} 471 472define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 473; CHECK-LABEL: vpaddd_maskz_fold_test: 474; CHECK: ## BB#0: 475; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 476; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 477; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 478; CHECK-NEXT: retq 479 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 480 %j = load <16 x i32>, <16 x i32>* %j.ptr 481 %x = add <16 x i32> %i, %j 482 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 483 ret <16 x i32> %r 484} 485 486define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 487; CHECK-LABEL: vpaddd_maskz_broadcast_test: 488; CHECK: ## BB#0: 489; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 490; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 491; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} 492; CHECK-NEXT: retq 493 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 494 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 495 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 496 ret <16 x i32> %r 497} 498 499define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 500; CHECK-LABEL: vpsubq_test: 501; CHECK: ## BB#0: 502; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 503; CHECK-NEXT: retq 504 %x = sub <8 x i64> %i, %j 505 ret <8 x i64> %x 506} 507 508define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 509; CHECK-LABEL: vpsubd_test: 510; CHECK: ## BB#0: 511; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 512; CHECK-NEXT: retq 513 %x = sub <16 x i32> %i, %j 514 ret <16 x i32> %x 515} 516 517define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 518; CHECK-LABEL: vpmulld_test: 519; CHECK: ## BB#0: 520; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 521; CHECK-NEXT: retq 522 %x = mul <16 x i32> %i, %j 523 ret <16 x i32> %x 524} 525 526declare float @sqrtf(float) readnone 527define float @sqrtA(float %a) nounwind uwtable readnone ssp { 528; CHECK-LABEL: sqrtA: 529; CHECK: ## BB#0: ## %entry 530; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 531; CHECK-NEXT: retq 532entry: 533 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 534 ret float %conv1 535} 536 537declare double @sqrt(double) readnone 538define double @sqrtB(double %a) nounwind uwtable readnone ssp { 539; CHECK-LABEL: sqrtB: 540; CHECK: ## BB#0: ## %entry 541; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 542; CHECK-NEXT: retq 543entry: 544 %call = tail call double @sqrt(double %a) nounwind readnone 545 ret double %call 546} 547 548declare float @llvm.sqrt.f32(float) 549define float @sqrtC(float %a) nounwind { 550; CHECK-LABEL: sqrtC: 551; CHECK: ## BB#0: 552; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 553; CHECK-NEXT: retq 554 %b = call float @llvm.sqrt.f32(float %a) 555 ret float %b 556} 557 558declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 559define <16 x float> @sqrtD(<16 x float> %a) nounwind { 560; CHECK-LABEL: sqrtD: 561; CHECK: ## BB#0: 562; CHECK-NEXT: vsqrtps %zmm0, %zmm0 563; CHECK-NEXT: retq 564 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 565 ret <16 x float> %b 566} 567 568declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 569define <8 x double> @sqrtE(<8 x double> %a) nounwind { 570; CHECK-LABEL: sqrtE: 571; CHECK: ## BB#0: 572; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 573; CHECK-NEXT: retq 574 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 575 ret <8 x double> %b 576} 577 578define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 579; CHECK-LABEL: fadd_broadcast: 580; CHECK: ## BB#0: 581; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 582; CHECK-NEXT: retq 583 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 584 ret <16 x float> %b 585} 586 587define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 588; CHECK-LABEL: addq_broadcast: 589; CHECK: ## BB#0: 590; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 591; CHECK-NEXT: retq 592 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 593 ret <8 x i64> %b 594} 595 596define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 597; CHECK-LABEL: orq_broadcast: 598; CHECK: ## BB#0: 599; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 600; CHECK-NEXT: retq 601 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 602 ret <8 x i64> %b 603} 604 605define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 606; CHECK-LABEL: andd512fold: 607; CHECK: ## BB#0: ## %entry 608; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0 609; CHECK-NEXT: retq 610entry: 611 %a = load <16 x i32>, <16 x i32>* %x, align 4 612 %b = and <16 x i32> %y, %a 613 ret <16 x i32> %b 614} 615 616define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 617; CHECK-LABEL: andqbrst: 618; CHECK: ## BB#0: ## %entry 619; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 620; CHECK-NEXT: retq 621entry: 622 %a = load i64, i64* %ap, align 8 623 %b = insertelement <8 x i64> undef, i64 %a, i32 0 624 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 625 %d = and <8 x i64> %p1, %c 626 ret <8 x i64>%d 627} 628 629define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, 630; CHECK-LABEL: test_mask_vaddps: 631; CHECK: ## BB#0: 632; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 633; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 634; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} 635; CHECK-NEXT: retq 636 <16 x float> %j, <16 x i32> %mask1) 637 nounwind readnone { 638 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 639 %x = fadd <16 x float> %i, %j 640 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 641 ret <16 x float> %r 642} 643 644define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, 645; CHECK-LABEL: test_mask_vmulps: 646; CHECK: ## BB#0: 647; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 648; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 649; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} 650; CHECK-NEXT: retq 651 <16 x float> %j, <16 x i32> %mask1) 652 nounwind readnone { 653 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 654 %x = fmul <16 x float> %i, %j 655 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 656 ret <16 x float> %r 657} 658 659define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, 660; CHECK-LABEL: test_mask_vminps: 661; CHECK: ## BB#0: 662; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 663; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 664; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} 665; CHECK-NEXT: retq 666 <16 x float> %j, <16 x i32> %mask1) 667 nounwind readnone { 668 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 669 %cmp_res = fcmp olt <16 x float> %i, %j 670 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 671 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst 672 ret <16 x float> %r 673} 674 675define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, 676; AVX512F-LABEL: test_mask_vminpd: 677; AVX512F: ## BB#0: 678; AVX512F-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 679; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4 680; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 681; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 682; AVX512F-NEXT: retq 683; 684; AVX512VL-LABEL: test_mask_vminpd: 685; AVX512VL: ## BB#0: 686; AVX512VL-NEXT: vpxord %ymm4, %ymm4, %ymm4 687; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 688; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 689; AVX512VL-NEXT: retq 690; 691; AVX512BW-LABEL: test_mask_vminpd: 692; AVX512BW: ## BB#0: 693; AVX512BW-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 694; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4 695; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 696; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 697; AVX512BW-NEXT: retq 698; 699; AVX512DQ-LABEL: test_mask_vminpd: 700; AVX512DQ: ## BB#0: 701; AVX512DQ-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 702; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 703; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 704; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 705; AVX512DQ-NEXT: retq 706; 707; SKX-LABEL: test_mask_vminpd: 708; SKX: ## BB#0: 709; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4 710; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 711; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 712; SKX-NEXT: retq 713 <8 x double> %j, <8 x i32> %mask1) 714 nounwind readnone { 715 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 716 %cmp_res = fcmp olt <8 x double> %i, %j 717 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 718 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst 719 ret <8 x double> %r 720} 721 722define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, 723; CHECK-LABEL: test_mask_vmaxps: 724; CHECK: ## BB#0: 725; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 726; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 727; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} 728; CHECK-NEXT: retq 729 <16 x float> %j, <16 x i32> %mask1) 730 nounwind readnone { 731 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 732 %cmp_res = fcmp ogt <16 x float> %i, %j 733 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 734 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst 735 ret <16 x float> %r 736} 737 738define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, 739; AVX512F-LABEL: test_mask_vmaxpd: 740; AVX512F: ## BB#0: 741; AVX512F-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 742; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4 743; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 744; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 745; AVX512F-NEXT: retq 746; 747; AVX512VL-LABEL: test_mask_vmaxpd: 748; AVX512VL: ## BB#0: 749; AVX512VL-NEXT: vpxord %ymm4, %ymm4, %ymm4 750; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 751; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 752; AVX512VL-NEXT: retq 753; 754; AVX512BW-LABEL: test_mask_vmaxpd: 755; AVX512BW: ## BB#0: 756; AVX512BW-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 757; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4 758; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 759; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 760; AVX512BW-NEXT: retq 761; 762; AVX512DQ-LABEL: test_mask_vmaxpd: 763; AVX512DQ: ## BB#0: 764; AVX512DQ-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def> 765; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 766; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 767; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 768; AVX512DQ-NEXT: retq 769; 770; SKX-LABEL: test_mask_vmaxpd: 771; SKX: ## BB#0: 772; SKX-NEXT: vpxord %ymm4, %ymm4, %ymm4 773; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 774; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 775; SKX-NEXT: retq 776 <8 x double> %j, <8 x i32> %mask1) 777 nounwind readnone { 778 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 779 %cmp_res = fcmp ogt <8 x double> %i, %j 780 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 781 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst 782 ret <8 x double> %r 783} 784 785define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, 786; CHECK-LABEL: test_mask_vsubps: 787; CHECK: ## BB#0: 788; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 789; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 790; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} 791; CHECK-NEXT: retq 792 <16 x float> %j, <16 x i32> %mask1) 793 nounwind readnone { 794 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 795 %x = fsub <16 x float> %i, %j 796 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 797 ret <16 x float> %r 798} 799 800define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, 801; CHECK-LABEL: test_mask_vdivps: 802; CHECK: ## BB#0: 803; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 804; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 805; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} 806; CHECK-NEXT: retq 807 <16 x float> %j, <16 x i32> %mask1) 808 nounwind readnone { 809 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 810 %x = fdiv <16 x float> %i, %j 811 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 812 ret <16 x float> %r 813} 814 815define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, 816; CHECK-LABEL: test_mask_vaddpd: 817; CHECK: ## BB#0: 818; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 819; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1 820; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} 821; CHECK-NEXT: retq 822 <8 x double> %j, <8 x i64> %mask1) 823 nounwind readnone { 824 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 825 %x = fadd <8 x double> %i, %j 826 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 827 ret <8 x double> %r 828} 829 830define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, 831; CHECK-LABEL: test_maskz_vaddpd: 832; CHECK: ## BB#0: 833; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 834; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 835; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} 836; CHECK-NEXT: retq 837 <8 x i64> %mask1) nounwind readnone { 838 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 839 %x = fadd <8 x double> %i, %j 840 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 841 ret <8 x double> %r 842} 843 844define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, 845; CHECK-LABEL: test_mask_fold_vaddpd: 846; CHECK: ## BB#0: 847; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 848; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 849; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} 850; CHECK-NEXT: retq 851 <8 x double>* %j, <8 x i64> %mask1) 852 nounwind { 853 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 854 %tmp = load <8 x double>, <8 x double>* %j, align 8 855 %x = fadd <8 x double> %i, %tmp 856 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 857 ret <8 x double> %r 858} 859 860define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, 861; CHECK-LABEL: test_maskz_fold_vaddpd: 862; CHECK: ## BB#0: 863; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 864; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 865; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} 866; CHECK-NEXT: retq 867 <8 x i64> %mask1) nounwind { 868 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 869 %tmp = load <8 x double>, <8 x double>* %j, align 8 870 %x = fadd <8 x double> %i, %tmp 871 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 872 ret <8 x double> %r 873} 874 875define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { 876; CHECK-LABEL: test_broadcast_vaddpd: 877; CHECK: ## BB#0: 878; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 879; CHECK-NEXT: retq 880 %tmp = load double, double* %j 881 %b = insertelement <8 x double> undef, double %tmp, i32 0 882 %c = shufflevector <8 x double> %b, <8 x double> undef, 883 <8 x i32> zeroinitializer 884 %x = fadd <8 x double> %c, %i 885 ret <8 x double> %x 886} 887 888define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, 889; CHECK-LABEL: test_mask_broadcast_vaddpd: 890; CHECK: ## BB#0: 891; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 892; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 893; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} 894; CHECK-NEXT: vmovaps %zmm1, %zmm0 895; CHECK-NEXT: retq 896 double* %j, <8 x i64> %mask1) nounwind { 897 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 898 %tmp = load double, double* %j 899 %b = insertelement <8 x double> undef, double %tmp, i32 0 900 %c = shufflevector <8 x double> %b, <8 x double> undef, 901 <8 x i32> zeroinitializer 902 %x = fadd <8 x double> %c, %i 903 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i 904 ret <8 x double> %r 905} 906 907define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, 908; CHECK-LABEL: test_maskz_broadcast_vaddpd: 909; CHECK: ## BB#0: 910; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 911; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 912; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 913; CHECK-NEXT: retq 914 <8 x i64> %mask1) nounwind { 915 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 916 %tmp = load double, double* %j 917 %b = insertelement <8 x double> undef, double %tmp, i32 0 918 %c = shufflevector <8 x double> %b, <8 x double> undef, 919 <8 x i32> zeroinitializer 920 %x = fadd <8 x double> %c, %i 921 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 922 ret <8 x double> %r 923} 924 925define <16 x float> @test_fxor(<16 x float> %a) { 926; AVX512F-LABEL: test_fxor: 927; AVX512F: ## BB#0: 928; AVX512F-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 929; AVX512F-NEXT: retq 930; 931; AVX512VL-LABEL: test_fxor: 932; AVX512VL: ## BB#0: 933; AVX512VL-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 934; AVX512VL-NEXT: retq 935; 936; AVX512BW-LABEL: test_fxor: 937; AVX512BW: ## BB#0: 938; AVX512BW-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 939; AVX512BW-NEXT: retq 940; 941; AVX512DQ-LABEL: test_fxor: 942; AVX512DQ: ## BB#0: 943; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 944; AVX512DQ-NEXT: retq 945; 946; SKX-LABEL: test_fxor: 947; SKX: ## BB#0: 948; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 949; SKX-NEXT: retq 950 951 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 952 ret <16 x float>%res 953} 954 955define <8 x float> @test_fxor_8f32(<8 x float> %a) { 956; CHECK-LABEL: test_fxor_8f32: 957; CHECK: ## BB#0: 958; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 959; CHECK-NEXT: retq 960 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 961 ret <8 x float>%res 962} 963 964define <8 x double> @fabs_v8f64(<8 x double> %p) 965; AVX512F-LABEL: fabs_v8f64: 966; AVX512F: ## BB#0: 967; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 968; AVX512F-NEXT: retq 969; 970; AVX512VL-LABEL: fabs_v8f64: 971; AVX512VL: ## BB#0: 972; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 973; AVX512VL-NEXT: retq 974; 975; AVX512BW-LABEL: fabs_v8f64: 976; AVX512BW: ## BB#0: 977; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 978; AVX512BW-NEXT: retq 979; 980; AVX512DQ-LABEL: fabs_v8f64: 981; AVX512DQ: ## BB#0: 982; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 983; AVX512DQ-NEXT: retq 984; 985; SKX-LABEL: fabs_v8f64: 986; SKX: ## BB#0: 987; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 988; SKX-NEXT: retq 989{ 990 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 991 ret <8 x double> %t 992} 993declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 994 995define <16 x float> @fabs_v16f32(<16 x float> %p) 996; AVX512F-LABEL: fabs_v16f32: 997; AVX512F: ## BB#0: 998; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 999; AVX512F-NEXT: retq 1000; 1001; AVX512VL-LABEL: fabs_v16f32: 1002; AVX512VL: ## BB#0: 1003; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 1004; AVX512VL-NEXT: retq 1005; 1006; AVX512BW-LABEL: fabs_v16f32: 1007; AVX512BW: ## BB#0: 1008; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 1009; AVX512BW-NEXT: retq 1010; 1011; AVX512DQ-LABEL: fabs_v16f32: 1012; AVX512DQ: ## BB#0: 1013; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 1014; AVX512DQ-NEXT: retq 1015; 1016; SKX-LABEL: fabs_v16f32: 1017; SKX: ## BB#0: 1018; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 1019; SKX-NEXT: retq 1020{ 1021 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1022 ret <16 x float> %t 1023} 1024declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1025