1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s 7 8define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 9; CHECK-LABEL: addpd512: 10; CHECK: ## BB#0: ## %entry 11; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 12; CHECK-NEXT: retq 13entry: 14 %add.i = fadd <8 x double> %x, %y 15 ret <8 x double> %add.i 16} 17 18define <8 x double> @addpd512fold(<8 x double> %y) { 19; CHECK-LABEL: addpd512fold: 20; CHECK: ## BB#0: ## %entry 21; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 22; CHECK-NEXT: retq 23entry: 24 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 25 ret <8 x double> %add.i 26} 27 28define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 29; CHECK-LABEL: addps512: 30; CHECK: ## BB#0: ## %entry 31; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 32; CHECK-NEXT: retq 33entry: 34 %add.i = fadd <16 x float> %x, %y 35 ret <16 x float> %add.i 36} 37 38define <16 x float> @addps512fold(<16 x float> %y) { 39; CHECK-LABEL: addps512fold: 40; CHECK: ## BB#0: ## %entry 41; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 42; CHECK-NEXT: retq 43entry: 44 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 45 ret <16 x float> %add.i 46} 47 48define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 49; CHECK-LABEL: subpd512: 50; CHECK: ## BB#0: ## %entry 51; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 52; CHECK-NEXT: retq 53entry: 54 %sub.i = fsub <8 x double> %x, %y 55 ret <8 x double> %sub.i 56} 57 58define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { 59; CHECK-LABEL: subpd512fold: 60; CHECK: ## BB#0: ## %entry 61; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 62; CHECK-NEXT: retq 63entry: 64 %tmp2 = load <8 x double>, <8 x double>* %x, align 8 65 %sub.i = fsub <8 x double> %y, %tmp2 66 ret <8 x double> %sub.i 67} 68 69define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 70; CHECK-LABEL: subps512: 71; CHECK: ## BB#0: ## %entry 72; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 73; CHECK-NEXT: retq 74entry: 75 %sub.i = fsub <16 x float> %x, %y 76 ret <16 x float> %sub.i 77} 78 79define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { 80; CHECK-LABEL: subps512fold: 81; CHECK: ## BB#0: ## %entry 82; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 83; CHECK-NEXT: retq 84entry: 85 %tmp2 = load <16 x float>, <16 x float>* %x, align 4 86 %sub.i = fsub <16 x float> %y, %tmp2 87 ret <16 x float> %sub.i 88} 89 90define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 91; AVX512F-LABEL: imulq512: 92; AVX512F: ## BB#0: 93; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 94; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3 95; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 96; AVX512F-NEXT: vpsllq $32, %zmm3, %zmm3 97; AVX512F-NEXT: vpaddq %zmm3, %zmm2, %zmm2 98; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm1 99; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 100; AVX512F-NEXT: vpsllq $32, %zmm0, %zmm0 101; AVX512F-NEXT: vpaddq %zmm0, %zmm2, %zmm0 102; AVX512F-NEXT: retq 103; 104; AVX512VL-LABEL: imulq512: 105; AVX512VL: ## BB#0: 106; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 107; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3 108; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 109; AVX512VL-NEXT: vpsllq $32, %zmm3, %zmm3 110; AVX512VL-NEXT: vpaddq %zmm3, %zmm2, %zmm2 111; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm1 112; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 113; AVX512VL-NEXT: vpsllq $32, %zmm0, %zmm0 114; AVX512VL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 115; AVX512VL-NEXT: retq 116; 117; AVX512BW-LABEL: imulq512: 118; AVX512BW: ## BB#0: 119; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 120; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3 121; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 122; AVX512BW-NEXT: vpsllq $32, %zmm3, %zmm3 123; AVX512BW-NEXT: vpaddq %zmm3, %zmm2, %zmm2 124; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm1 125; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 126; AVX512BW-NEXT: vpsllq $32, %zmm0, %zmm0 127; AVX512BW-NEXT: vpaddq %zmm0, %zmm2, %zmm0 128; AVX512BW-NEXT: retq 129; 130; AVX512DQ-LABEL: imulq512: 131; AVX512DQ: ## BB#0: 132; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 133; AVX512DQ-NEXT: retq 134; 135; SKX-LABEL: imulq512: 136; SKX: ## BB#0: 137; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 138; SKX-NEXT: retq 139 %z = mul <8 x i64>%x, %y 140 ret <8 x i64>%z 141} 142 143define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 144; CHECK-LABEL: mulpd512: 145; CHECK: ## BB#0: ## %entry 146; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 147; CHECK-NEXT: retq 148entry: 149 %mul.i = fmul <8 x double> %x, %y 150 ret <8 x double> %mul.i 151} 152 153define <8 x double> @mulpd512fold(<8 x double> %y) { 154; CHECK-LABEL: mulpd512fold: 155; CHECK: ## BB#0: ## %entry 156; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 157; CHECK-NEXT: retq 158entry: 159 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 160 ret <8 x double> %mul.i 161} 162 163define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 164; CHECK-LABEL: mulps512: 165; CHECK: ## BB#0: ## %entry 166; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 167; CHECK-NEXT: retq 168entry: 169 %mul.i = fmul <16 x float> %x, %y 170 ret <16 x float> %mul.i 171} 172 173define <16 x float> @mulps512fold(<16 x float> %y) { 174; CHECK-LABEL: mulps512fold: 175; CHECK: ## BB#0: ## %entry 176; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 177; CHECK-NEXT: retq 178entry: 179 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 180 ret <16 x float> %mul.i 181} 182 183define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 184; CHECK-LABEL: divpd512: 185; CHECK: ## BB#0: ## %entry 186; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 187; CHECK-NEXT: retq 188entry: 189 %div.i = fdiv <8 x double> %x, %y 190 ret <8 x double> %div.i 191} 192 193define <8 x double> @divpd512fold(<8 x double> %y) { 194; CHECK-LABEL: divpd512fold: 195; CHECK: ## BB#0: ## %entry 196; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 197; CHECK-NEXT: retq 198entry: 199 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 200 ret <8 x double> %div.i 201} 202 203define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 204; CHECK-LABEL: divps512: 205; CHECK: ## BB#0: ## %entry 206; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 207; CHECK-NEXT: retq 208entry: 209 %div.i = fdiv <16 x float> %x, %y 210 ret <16 x float> %div.i 211} 212 213define <16 x float> @divps512fold(<16 x float> %y) { 214; CHECK-LABEL: divps512fold: 215; CHECK: ## BB#0: ## %entry 216; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 217; CHECK-NEXT: retq 218entry: 219 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 220 ret <16 x float> %div.i 221} 222 223define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 224; CHECK-LABEL: vpaddq_test: 225; CHECK: ## BB#0: 226; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 227; CHECK-NEXT: retq 228 %x = add <8 x i64> %i, %j 229 ret <8 x i64> %x 230} 231 232define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { 233; CHECK-LABEL: vpaddq_fold_test: 234; CHECK: ## BB#0: 235; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 236; CHECK-NEXT: retq 237 %tmp = load <8 x i64>, <8 x i64>* %j, align 4 238 %x = add <8 x i64> %i, %tmp 239 ret <8 x i64> %x 240} 241 242define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 243; CHECK-LABEL: vpaddq_broadcast_test: 244; CHECK: ## BB#0: 245; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 246; CHECK-NEXT: retq 247 %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 248 ret <8 x i64> %x 249} 250 251define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { 252; CHECK-LABEL: vpaddq_broadcast2_test: 253; CHECK: ## BB#0: 254; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 255; CHECK-NEXT: retq 256 %tmp = load i64, i64* %j 257 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 258 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 259 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 260 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 261 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 262 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 263 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 264 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 265 %x = add <8 x i64> %i, %j.7 266 ret <8 x i64> %x 267} 268 269define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 270; CHECK-LABEL: vpaddd_test: 271; CHECK: ## BB#0: 272; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 273; CHECK-NEXT: retq 274 %x = add <16 x i32> %i, %j 275 ret <16 x i32> %x 276} 277 278define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { 279; CHECK-LABEL: vpaddd_fold_test: 280; CHECK: ## BB#0: 281; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 282; CHECK-NEXT: retq 283 %tmp = load <16 x i32>, <16 x i32>* %j, align 4 284 %x = add <16 x i32> %i, %tmp 285 ret <16 x i32> %x 286} 287 288define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 289; CHECK-LABEL: vpaddd_broadcast_test: 290; CHECK: ## BB#0: 291; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 292; CHECK-NEXT: retq 293 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 294 ret <16 x i32> %x 295} 296 297define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 298; CHECK-LABEL: vpaddd_mask_test: 299; CHECK: ## BB#0: 300; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 301; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 302; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} 303; CHECK-NEXT: retq 304 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 305 %x = add <16 x i32> %i, %j 306 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 307 ret <16 x i32> %r 308} 309 310define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 311; CHECK-LABEL: vpaddd_maskz_test: 312; CHECK: ## BB#0: 313; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 314; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 315; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 316; CHECK-NEXT: retq 317 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 318 %x = add <16 x i32> %i, %j 319 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 320 ret <16 x i32> %r 321} 322 323define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 324; CHECK-LABEL: vpaddd_mask_fold_test: 325; CHECK: ## BB#0: 326; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 327; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 328; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} 329; CHECK-NEXT: retq 330 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 331 %j = load <16 x i32>, <16 x i32>* %j.ptr 332 %x = add <16 x i32> %i, %j 333 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 334 ret <16 x i32> %r 335} 336 337define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 338; CHECK-LABEL: vpaddd_mask_broadcast_test: 339; CHECK: ## BB#0: 340; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 341; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 342; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} 343; CHECK-NEXT: retq 344 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 345 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 346 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 347 ret <16 x i32> %r 348} 349 350define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { 351; CHECK-LABEL: vpaddd_maskz_fold_test: 352; CHECK: ## BB#0: 353; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 354; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 355; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 356; CHECK-NEXT: retq 357 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 358 %j = load <16 x i32>, <16 x i32>* %j.ptr 359 %x = add <16 x i32> %i, %j 360 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 361 ret <16 x i32> %r 362} 363 364define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 365; CHECK-LABEL: vpaddd_maskz_broadcast_test: 366; CHECK: ## BB#0: 367; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 368; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 369; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} 370; CHECK-NEXT: retq 371 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 372 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 373 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 374 ret <16 x i32> %r 375} 376 377define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 378; CHECK-LABEL: vpsubq_test: 379; CHECK: ## BB#0: 380; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 381; CHECK-NEXT: retq 382 %x = sub <8 x i64> %i, %j 383 ret <8 x i64> %x 384} 385 386define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 387; CHECK-LABEL: vpsubd_test: 388; CHECK: ## BB#0: 389; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 390; CHECK-NEXT: retq 391 %x = sub <16 x i32> %i, %j 392 ret <16 x i32> %x 393} 394 395define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 396; CHECK-LABEL: vpmulld_test: 397; CHECK: ## BB#0: 398; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 399; CHECK-NEXT: retq 400 %x = mul <16 x i32> %i, %j 401 ret <16 x i32> %x 402} 403 404declare float @sqrtf(float) readnone 405define float @sqrtA(float %a) nounwind uwtable readnone ssp { 406; CHECK-LABEL: sqrtA: 407; CHECK: ## BB#0: ## %entry 408; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 409; CHECK-NEXT: retq 410entry: 411 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 412 ret float %conv1 413} 414 415declare double @sqrt(double) readnone 416define double @sqrtB(double %a) nounwind uwtable readnone ssp { 417; CHECK-LABEL: sqrtB: 418; CHECK: ## BB#0: ## %entry 419; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 420; CHECK-NEXT: retq 421entry: 422 %call = tail call double @sqrt(double %a) nounwind readnone 423 ret double %call 424} 425 426declare float @llvm.sqrt.f32(float) 427define float @sqrtC(float %a) nounwind { 428; CHECK-LABEL: sqrtC: 429; CHECK: ## BB#0: 430; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 431; CHECK-NEXT: retq 432 %b = call float @llvm.sqrt.f32(float %a) 433 ret float %b 434} 435 436declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 437define <16 x float> @sqrtD(<16 x float> %a) nounwind { 438; CHECK-LABEL: sqrtD: 439; CHECK: ## BB#0: 440; CHECK-NEXT: vsqrtps %zmm0, %zmm0 441; CHECK-NEXT: retq 442 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 443 ret <16 x float> %b 444} 445 446declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 447define <8 x double> @sqrtE(<8 x double> %a) nounwind { 448; CHECK-LABEL: sqrtE: 449; CHECK: ## BB#0: 450; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 451; CHECK-NEXT: retq 452 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 453 ret <8 x double> %b 454} 455 456define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 457; CHECK-LABEL: fadd_broadcast: 458; CHECK: ## BB#0: 459; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 460; CHECK-NEXT: retq 461 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 462 ret <16 x float> %b 463} 464 465define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 466; CHECK-LABEL: addq_broadcast: 467; CHECK: ## BB#0: 468; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 469; CHECK-NEXT: retq 470 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 471 ret <8 x i64> %b 472} 473 474define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 475; CHECK-LABEL: orq_broadcast: 476; CHECK: ## BB#0: 477; CHECK-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 478; CHECK-NEXT: retq 479 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 480 ret <8 x i64> %b 481} 482 483define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { 484; CHECK-LABEL: andd512fold: 485; CHECK: ## BB#0: ## %entry 486; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0 487; CHECK-NEXT: retq 488entry: 489 %a = load <16 x i32>, <16 x i32>* %x, align 4 490 %b = and <16 x i32> %y, %a 491 ret <16 x i32> %b 492} 493 494define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { 495; CHECK-LABEL: andqbrst: 496; CHECK: ## BB#0: ## %entry 497; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 498; CHECK-NEXT: retq 499entry: 500 %a = load i64, i64* %ap, align 8 501 %b = insertelement <8 x i64> undef, i64 %a, i32 0 502 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 503 %d = and <8 x i64> %p1, %c 504 ret <8 x i64>%d 505} 506 507define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, 508; CHECK-LABEL: test_mask_vaddps: 509; CHECK: ## BB#0: 510; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 511; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 512; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} 513; CHECK-NEXT: retq 514 <16 x float> %j, <16 x i32> %mask1) 515 nounwind readnone { 516 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 517 %x = fadd <16 x float> %i, %j 518 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 519 ret <16 x float> %r 520} 521 522define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, 523; CHECK-LABEL: test_mask_vmulps: 524; CHECK: ## BB#0: 525; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 526; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 527; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} 528; CHECK-NEXT: retq 529 <16 x float> %j, <16 x i32> %mask1) 530 nounwind readnone { 531 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 532 %x = fmul <16 x float> %i, %j 533 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 534 ret <16 x float> %r 535} 536 537define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, 538; CHECK-LABEL: test_mask_vminps: 539; CHECK: ## BB#0: 540; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 541; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 542; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} 543; CHECK-NEXT: retq 544 <16 x float> %j, <16 x i32> %mask1) 545 nounwind readnone { 546 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 547 %cmp_res = fcmp olt <16 x float> %i, %j 548 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 549 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst 550 ret <16 x float> %r 551} 552 553define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, 554; AVX512F-LABEL: test_mask_vminpd: 555; AVX512F: ## BB#0: 556; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4 557; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 558; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 559; AVX512F-NEXT: retq 560; 561; AVX512VL-LABEL: test_mask_vminpd: 562; AVX512VL: ## BB#0: 563; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4 564; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 565; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 566; AVX512VL-NEXT: retq 567; 568; AVX512BW-LABEL: test_mask_vminpd: 569; AVX512BW: ## BB#0: 570; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4 571; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 572; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 573; AVX512BW-NEXT: retq 574; 575; AVX512DQ-LABEL: test_mask_vminpd: 576; AVX512DQ: ## BB#0: 577; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 578; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 579; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 580; AVX512DQ-NEXT: retq 581; 582; SKX-LABEL: test_mask_vminpd: 583; SKX: ## BB#0: 584; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4 585; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 586; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 587; SKX-NEXT: retq 588 <8 x double> %j, <8 x i32> %mask1) 589 nounwind readnone { 590 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 591 %cmp_res = fcmp olt <8 x double> %i, %j 592 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 593 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst 594 ret <8 x double> %r 595} 596 597define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, 598; CHECK-LABEL: test_mask_vmaxps: 599; CHECK: ## BB#0: 600; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 601; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 602; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} 603; CHECK-NEXT: retq 604 <16 x float> %j, <16 x i32> %mask1) 605 nounwind readnone { 606 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 607 %cmp_res = fcmp ogt <16 x float> %i, %j 608 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 609 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst 610 ret <16 x float> %r 611} 612 613define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, 614; AVX512F-LABEL: test_mask_vmaxpd: 615; AVX512F: ## BB#0: 616; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4 617; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 618; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 619; AVX512F-NEXT: retq 620; 621; AVX512VL-LABEL: test_mask_vmaxpd: 622; AVX512VL: ## BB#0: 623; AVX512VL-NEXT: vpxor %ymm4, %ymm4, %ymm4 624; AVX512VL-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 625; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 626; AVX512VL-NEXT: retq 627; 628; AVX512BW-LABEL: test_mask_vmaxpd: 629; AVX512BW: ## BB#0: 630; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4 631; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 632; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 633; AVX512BW-NEXT: retq 634; 635; AVX512DQ-LABEL: test_mask_vmaxpd: 636; AVX512DQ: ## BB#0: 637; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4 638; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 639; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 640; AVX512DQ-NEXT: retq 641; 642; SKX-LABEL: test_mask_vmaxpd: 643; SKX: ## BB#0: 644; SKX-NEXT: vpxor %ymm4, %ymm4, %ymm4 645; SKX-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 646; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 647; SKX-NEXT: retq 648 <8 x double> %j, <8 x i32> %mask1) 649 nounwind readnone { 650 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 651 %cmp_res = fcmp ogt <8 x double> %i, %j 652 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 653 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst 654 ret <8 x double> %r 655} 656 657define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, 658; CHECK-LABEL: test_mask_vsubps: 659; CHECK: ## BB#0: 660; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 661; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 662; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} 663; CHECK-NEXT: retq 664 <16 x float> %j, <16 x i32> %mask1) 665 nounwind readnone { 666 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 667 %x = fsub <16 x float> %i, %j 668 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 669 ret <16 x float> %r 670} 671 672define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, 673; CHECK-LABEL: test_mask_vdivps: 674; CHECK: ## BB#0: 675; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 676; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 677; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} 678; CHECK-NEXT: retq 679 <16 x float> %j, <16 x i32> %mask1) 680 nounwind readnone { 681 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 682 %x = fdiv <16 x float> %i, %j 683 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 684 ret <16 x float> %r 685} 686 687define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, 688; CHECK-LABEL: test_mask_vaddpd: 689; CHECK: ## BB#0: 690; CHECK-NEXT: vpxord %zmm4, %zmm4, %zmm4 691; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1 692; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} 693; CHECK-NEXT: retq 694 <8 x double> %j, <8 x i64> %mask1) 695 nounwind readnone { 696 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 697 %x = fadd <8 x double> %i, %j 698 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 699 ret <8 x double> %r 700} 701 702define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, 703; CHECK-LABEL: test_maskz_vaddpd: 704; CHECK: ## BB#0: 705; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 706; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 707; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} 708; CHECK-NEXT: retq 709 <8 x i64> %mask1) nounwind readnone { 710 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 711 %x = fadd <8 x double> %i, %j 712 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 713 ret <8 x double> %r 714} 715 716define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, 717; CHECK-LABEL: test_mask_fold_vaddpd: 718; CHECK: ## BB#0: 719; CHECK-NEXT: vpxord %zmm3, %zmm3, %zmm3 720; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 721; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} 722; CHECK-NEXT: retq 723 <8 x double>* %j, <8 x i64> %mask1) 724 nounwind { 725 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 726 %tmp = load <8 x double>, <8 x double>* %j, align 8 727 %x = fadd <8 x double> %i, %tmp 728 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 729 ret <8 x double> %r 730} 731 732define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, 733; CHECK-LABEL: test_maskz_fold_vaddpd: 734; CHECK: ## BB#0: 735; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 736; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 737; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} 738; CHECK-NEXT: retq 739 <8 x i64> %mask1) nounwind { 740 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 741 %tmp = load <8 x double>, <8 x double>* %j, align 8 742 %x = fadd <8 x double> %i, %tmp 743 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 744 ret <8 x double> %r 745} 746 747define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { 748; CHECK-LABEL: test_broadcast_vaddpd: 749; CHECK: ## BB#0: 750; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 751; CHECK-NEXT: retq 752 %tmp = load double, double* %j 753 %b = insertelement <8 x double> undef, double %tmp, i32 0 754 %c = shufflevector <8 x double> %b, <8 x double> undef, 755 <8 x i32> zeroinitializer 756 %x = fadd <8 x double> %c, %i 757 ret <8 x double> %x 758} 759 760define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, 761; CHECK-LABEL: test_mask_broadcast_vaddpd: 762; CHECK: ## BB#0: 763; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0 764; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 765; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} 766; CHECK-NEXT: vmovaps %zmm1, %zmm0 767; CHECK-NEXT: retq 768 double* %j, <8 x i64> %mask1) nounwind { 769 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 770 %tmp = load double, double* %j 771 %b = insertelement <8 x double> undef, double %tmp, i32 0 772 %c = shufflevector <8 x double> %b, <8 x double> undef, 773 <8 x i32> zeroinitializer 774 %x = fadd <8 x double> %c, %i 775 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i 776 ret <8 x double> %r 777} 778 779define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, 780; CHECK-LABEL: test_maskz_broadcast_vaddpd: 781; CHECK: ## BB#0: 782; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 783; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 784; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 785; CHECK-NEXT: retq 786 <8 x i64> %mask1) nounwind { 787 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 788 %tmp = load double, double* %j 789 %b = insertelement <8 x double> undef, double %tmp, i32 0 790 %c = shufflevector <8 x double> %b, <8 x double> undef, 791 <8 x i32> zeroinitializer 792 %x = fadd <8 x double> %c, %i 793 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 794 ret <8 x double> %r 795} 796 797define <16 x float> @test_fxor(<16 x float> %a) { 798; AVX512F-LABEL: test_fxor: 799; AVX512F: ## BB#0: 800; AVX512F-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 801; AVX512F-NEXT: retq 802; 803; AVX512VL-LABEL: test_fxor: 804; AVX512VL: ## BB#0: 805; AVX512VL-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 806; AVX512VL-NEXT: retq 807; 808; AVX512BW-LABEL: test_fxor: 809; AVX512BW: ## BB#0: 810; AVX512BW-NEXT: vpxord {{.*}}(%rip), %zmm0, %zmm0 811; AVX512BW-NEXT: retq 812; 813; AVX512DQ-LABEL: test_fxor: 814; AVX512DQ: ## BB#0: 815; AVX512DQ-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 816; AVX512DQ-NEXT: retq 817; 818; SKX-LABEL: test_fxor: 819; SKX: ## BB#0: 820; SKX-NEXT: vxorps {{.*}}(%rip), %zmm0, %zmm0 821; SKX-NEXT: retq 822 823 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 824 ret <16 x float>%res 825} 826 827define <8 x float> @test_fxor_8f32(<8 x float> %a) { 828; CHECK-LABEL: test_fxor_8f32: 829; CHECK: ## BB#0: 830; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0 831; CHECK-NEXT: retq 832 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 833 ret <8 x float>%res 834} 835 836define <8 x double> @fabs_v8f64(<8 x double> %p) 837; AVX512F-LABEL: fabs_v8f64: 838; AVX512F: ## BB#0: 839; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 840; AVX512F-NEXT: retq 841; 842; AVX512VL-LABEL: fabs_v8f64: 843; AVX512VL: ## BB#0: 844; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 845; AVX512VL-NEXT: retq 846; 847; AVX512BW-LABEL: fabs_v8f64: 848; AVX512BW: ## BB#0: 849; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 850; AVX512BW-NEXT: retq 851; 852; AVX512DQ-LABEL: fabs_v8f64: 853; AVX512DQ: ## BB#0: 854; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 855; AVX512DQ-NEXT: retq 856; 857; SKX-LABEL: fabs_v8f64: 858; SKX: ## BB#0: 859; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0 860; SKX-NEXT: retq 861{ 862 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 863 ret <8 x double> %t 864} 865declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 866 867define <16 x float> @fabs_v16f32(<16 x float> %p) 868; AVX512F-LABEL: fabs_v16f32: 869; AVX512F: ## BB#0: 870; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 871; AVX512F-NEXT: retq 872; 873; AVX512VL-LABEL: fabs_v16f32: 874; AVX512VL: ## BB#0: 875; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 876; AVX512VL-NEXT: retq 877; 878; AVX512BW-LABEL: fabs_v16f32: 879; AVX512BW: ## BB#0: 880; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 881; AVX512BW-NEXT: retq 882; 883; AVX512DQ-LABEL: fabs_v16f32: 884; AVX512DQ: ## BB#0: 885; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 886; AVX512DQ-NEXT: retq 887; 888; SKX-LABEL: fabs_v16f32: 889; SKX: ## BB#0: 890; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 891; SKX-NEXT: retq 892{ 893 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 894 ret <16 x float> %t 895} 896declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 897