1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s 3 4declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 5declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 6 7define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 8; CHECK-LABEL: test_x86_vfnmadd_ps_z: 9; CHECK: ## BB#0: 10; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 11; CHECK-NEXT: retq 12 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 13 ret <16 x float> %res 14} 15declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 16 17define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 18; CHECK-LABEL: test_mask_vfnmadd_ps: 19; CHECK: ## BB#0: 20; CHECK-NEXT: kmovw %edi, %k1 21; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 {%k1} 22; CHECK-NEXT: retq 23 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 24 ret <16 x float> %res 25} 26 27define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 28; CHECK-LABEL: test_x86_vfnmadd_pd_z: 29; CHECK: ## BB#0: 30; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 31; CHECK-NEXT: retq 32 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 33 ret <8 x double> %res 34} 35declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 36 37define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 38; CHECK-LABEL: test_mask_vfnmadd_pd: 39; CHECK: ## BB#0: 40; CHECK-NEXT: kmovw %edi, %k1 41; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 {%k1} 42; CHECK-NEXT: retq 43 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 44 ret <8 x double> %res 45} 46 47define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 48; CHECK-LABEL: test_x86_vfnmsubps_z: 49; CHECK: ## BB#0: 50; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 51; CHECK-NEXT: retq 52 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 53 ret <16 x float> %res 54} 55declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 56 57define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 58; CHECK-LABEL: test_mask_vfnmsub_ps: 59; CHECK: ## BB#0: 60; CHECK-NEXT: kmovw %edi, %k1 61; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 {%k1} 62; CHECK-NEXT: retq 63 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 64 ret <16 x float> %res 65} 66 67define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 68; CHECK-LABEL: test_x86_vfnmsubpd_z: 69; CHECK: ## BB#0: 70; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 71; CHECK-NEXT: retq 72 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 73 ret <8 x double> %res 74} 75declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 76 77define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 78; CHECK-LABEL: test_mask_vfnmsub_pd: 79; CHECK: ## BB#0: 80; CHECK-NEXT: kmovw %edi, %k1 81; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} 82; CHECK-NEXT: retq 83 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 84 ret <8 x double> %res 85} 86 87define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 88; CHECK-LABEL: test_x86_vfmaddsubps_z: 89; CHECK: ## BB#0: 90; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 91; CHECK-NEXT: retq 92 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 93 ret <16 x float> %res 94} 95 96define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 97; CHECK-LABEL: test_mask_fmaddsub_ps: 98; CHECK: ## BB#0: 99; CHECK-NEXT: kmovw %edi, %k1 100; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} 101; CHECK-NEXT: retq 102 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4) 103 ret <16 x float> %res 104} 105 106declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 107 108define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 109; CHECK-LABEL: test_x86_vfmaddsubpd_z: 110; CHECK: ## BB#0: 111; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 112; CHECK-NEXT: retq 113 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 114 ret <8 x double> %res 115} 116declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 117 118define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 119; CHECK-LABEL: test_mask_vfmaddsub_pd: 120; CHECK: ## BB#0: 121; CHECK-NEXT: kmovw %edi, %k1 122; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} 123; CHECK-NEXT: retq 124 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 125 ret <8 x double> %res 126} 127 128define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 129; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512: 130; CHECK: ## BB#0: 131; CHECK-NEXT: kmovw %edi, %k1 132; CHECK-NEXT: vmovaps %zmm0, %zmm3 133; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} 134; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 135; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 136; CHECK-NEXT: retq 137 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 138 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 139 %res2 = fadd <8 x double> %res, %res1 140 ret <8 x double> %res2 141} 142 143declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 144 145define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 146; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512: 147; CHECK: ## BB#0: 148; CHECK-NEXT: kmovw %edi, %k1 149; CHECK-NEXT: vmovaps %zmm2, %zmm3 150; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1} 151; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 152; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 153; CHECK-NEXT: retq 154 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 155 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 156 %res2 = fadd <8 x double> %res, %res1 157 ret <8 x double> %res2 158} 159 160declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 161 162define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 163; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512: 164; CHECK: ## BB#0: 165; CHECK-NEXT: kmovw %edi, %k1 166; CHECK-NEXT: vmovaps %zmm0, %zmm3 167; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z} 168; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 169; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 170; CHECK-NEXT: retq 171 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 172 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 173 %res2 = fadd <8 x double> %res, %res1 174 ret <8 x double> %res2 175} 176 177define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 178; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512: 179; CHECK: ## BB#0: 180; CHECK-NEXT: kmovw %edi, %k1 181; CHECK-NEXT: vmovaps %zmm0, %zmm3 182; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} 183; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 184; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 185; CHECK-NEXT: retq 186 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 187 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 188 %res2 = fadd <16 x float> %res, %res1 189 ret <16 x float> %res2 190} 191 192declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 193 194define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 195; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512: 196; CHECK: ## BB#0: 197; CHECK-NEXT: kmovw %edi, %k1 198; CHECK-NEXT: vmovaps %zmm2, %zmm3 199; CHECK-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1} 200; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 201; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 202; CHECK-NEXT: retq 203 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 204 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 205 %res2 = fadd <16 x float> %res, %res1 206 ret <16 x float> %res2 207} 208 209declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 210 211define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 212; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512: 213; CHECK: ## BB#0: 214; CHECK-NEXT: kmovw %edi, %k1 215; CHECK-NEXT: vmovaps %zmm0, %zmm3 216; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z} 217; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 218; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 219; CHECK-NEXT: retq 220 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 221 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 222 %res2 = fadd <16 x float> %res, %res1 223 ret <16 x float> %res2 224} 225 226declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 227 228define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 229; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512: 230; CHECK: ## BB#0: 231; CHECK-NEXT: kmovw %edi, %k1 232; CHECK-NEXT: vmovaps %zmm2, %zmm3 233; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1} 234; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 235; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 236; CHECK-NEXT: retq 237 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 238 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 239 %res2 = fadd <8 x double> %res, %res1 240 ret <8 x double> %res2 241} 242 243declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 244 245define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 246; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512: 247; CHECK: ## BB#0: 248; CHECK-NEXT: kmovw %edi, %k1 249; CHECK-NEXT: vmovaps %zmm2, %zmm3 250; CHECK-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1} 251; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 252; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 253; CHECK-NEXT: retq 254 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 255 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 256 %res2 = fadd <16 x float> %res, %res1 257 ret <16 x float> %res2 258} 259 260define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 261; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne: 262; CHECK: ## BB#0: 263; CHECK-NEXT: kmovw %edi, %k1 264; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} 265; CHECK-NEXT: retq 266 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind 267 ret <16 x float> %res 268} 269 270define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 271; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn: 272; CHECK: ## BB#0: 273; CHECK-NEXT: kmovw %edi, %k1 274; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} 275; CHECK-NEXT: retq 276 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind 277 ret <16 x float> %res 278} 279 280define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 281; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp: 282; CHECK: ## BB#0: 283; CHECK-NEXT: kmovw %edi, %k1 284; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} 285; CHECK-NEXT: retq 286 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind 287 ret <16 x float> %res 288} 289 290define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 291; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz: 292; CHECK: ## BB#0: 293; CHECK-NEXT: kmovw %edi, %k1 294; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} 295; CHECK-NEXT: retq 296 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind 297 ret <16 x float> %res 298} 299 300define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 301; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current: 302; CHECK: ## BB#0: 303; CHECK-NEXT: kmovw %edi, %k1 304; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} 305; CHECK-NEXT: retq 306 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 307 ret <16 x float> %res 308} 309 310define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 311; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne: 312; CHECK: ## BB#0: 313; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 314; CHECK-NEXT: retq 315 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind 316 ret <16 x float> %res 317} 318 319define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 320; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn: 321; CHECK: ## BB#0: 322; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 323; CHECK-NEXT: retq 324 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind 325 ret <16 x float> %res 326} 327 328define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 329; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp: 330; CHECK: ## BB#0: 331; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 332; CHECK-NEXT: retq 333 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind 334 ret <16 x float> %res 335} 336 337define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 338; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz: 339; CHECK: ## BB#0: 340; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 341; CHECK-NEXT: retq 342 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind 343 ret <16 x float> %res 344} 345 346define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 347; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current: 348; CHECK: ## BB#0: 349; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 350; CHECK-NEXT: retq 351 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 352 ret <16 x float> %res 353} 354 355declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 356 357define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 358; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512: 359; CHECK: ## BB#0: 360; CHECK-NEXT: kmovw %edi, %k1 361; CHECK-NEXT: vmovaps %zmm2, %zmm3 362; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1} 363; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 364; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 365; CHECK-NEXT: retq 366 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 367 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 368 %res2 = fadd <8 x double> %res, %res1 369 ret <8 x double> %res2 370} 371 372declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 373 374define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 375; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512: 376; CHECK: ## BB#0: 377; CHECK-NEXT: kmovw %edi, %k1 378; CHECK-NEXT: vmovaps %zmm2, %zmm3 379; CHECK-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1} 380; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 381; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 382; CHECK-NEXT: retq 383 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 384 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 385 %res2 = fadd <16 x float> %res, %res1 386 ret <16 x float> %res2 387} 388 389define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 390; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne: 391; CHECK: ## BB#0: 392; CHECK-NEXT: kmovw %edi, %k1 393; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} 394; CHECK-NEXT: retq 395 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 396 ret <8 x double> %res 397} 398 399define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 400; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn: 401; CHECK: ## BB#0: 402; CHECK-NEXT: kmovw %edi, %k1 403; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} 404; CHECK-NEXT: retq 405 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 406 ret <8 x double> %res 407} 408 409define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 410; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp: 411; CHECK: ## BB#0: 412; CHECK-NEXT: kmovw %edi, %k1 413; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} 414; CHECK-NEXT: retq 415 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 416 ret <8 x double> %res 417} 418 419define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 420; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz: 421; CHECK: ## BB#0: 422; CHECK-NEXT: kmovw %edi, %k1 423; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} 424; CHECK-NEXT: retq 425 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 426 ret <8 x double> %res 427} 428 429define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 430; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current: 431; CHECK: ## BB#0: 432; CHECK-NEXT: kmovw %edi, %k1 433; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} 434; CHECK-NEXT: retq 435 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 436 ret <8 x double> %res 437} 438 439define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 440; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne: 441; CHECK: ## BB#0: 442; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 443; CHECK-NEXT: retq 444 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 445 ret <8 x double> %res 446} 447 448define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 449; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn: 450; CHECK: ## BB#0: 451; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 452; CHECK-NEXT: retq 453 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 454 ret <8 x double> %res 455} 456 457define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 458; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp: 459; CHECK: ## BB#0: 460; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 461; CHECK-NEXT: retq 462 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 463 ret <8 x double> %res 464} 465 466define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 467; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz: 468; CHECK: ## BB#0: 469; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 470; CHECK-NEXT: retq 471 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 472 ret <8 x double> %res 473} 474 475define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 476; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current: 477; CHECK: ## BB#0: 478; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 479; CHECK-NEXT: retq 480 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 481 ret <8 x double> %res 482} 483 484define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 485; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512: 486; CHECK: ## BB#0: 487; CHECK-NEXT: kmovw %edi, %k1 488; CHECK-NEXT: vmovaps %zmm0, %zmm3 489; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} 490; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 491; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 492; CHECK-NEXT: retq 493 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 494 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 495 %res2 = fadd <8 x double> %res, %res1 496 ret <8 x double> %res2 497} 498 499declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 500 501define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 502; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512: 503; CHECK: ## BB#0: 504; CHECK-NEXT: kmovw %edi, %k1 505; CHECK-NEXT: vmovaps %zmm2, %zmm3 506; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1} 507; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 508; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 509; CHECK-NEXT: retq 510 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 511 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 512 %res2 = fadd <8 x double> %res, %res1 513 ret <8 x double> %res2 514} 515 516declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 517 518define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 519; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512: 520; CHECK: ## BB#0: 521; CHECK-NEXT: kmovw %edi, %k1 522; CHECK-NEXT: vmovaps %zmm0, %zmm3 523; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z} 524; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 525; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 526; CHECK-NEXT: retq 527 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 528 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 529 %res2 = fadd <8 x double> %res, %res1 530 ret <8 x double> %res2 531} 532 533define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 534; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512: 535; CHECK: ## BB#0: 536; CHECK-NEXT: kmovw %edi, %k1 537; CHECK-NEXT: vmovaps %zmm0, %zmm3 538; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} 539; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 540; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 541; CHECK-NEXT: retq 542 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 543 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 544 %res2 = fadd <16 x float> %res, %res1 545 ret <16 x float> %res2 546} 547 548declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 549 550define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 551; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512: 552; CHECK: ## BB#0: 553; CHECK-NEXT: kmovw %edi, %k1 554; CHECK-NEXT: vmovaps %zmm2, %zmm3 555; CHECK-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1} 556; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 557; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 558; CHECK-NEXT: retq 559 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 560 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 561 %res2 = fadd <16 x float> %res, %res1 562 ret <16 x float> %res2 563} 564 565declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 566 567define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 568; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512: 569; CHECK: ## BB#0: 570; CHECK-NEXT: kmovw %edi, %k1 571; CHECK-NEXT: vmovaps %zmm0, %zmm3 572; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z} 573; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 574; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 575; CHECK-NEXT: retq 576 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 577 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 578 %res2 = fadd <16 x float> %res, %res1 579 ret <16 x float> %res2 580} 581 582 583define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 584; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne: 585; CHECK: ## BB#0: 586; CHECK-NEXT: kmovw %edi, %k1 587; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} 588; CHECK-NEXT: retq 589 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 590 ret <8 x double> %res 591} 592 593define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 594; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn: 595; CHECK: ## BB#0: 596; CHECK-NEXT: kmovw %edi, %k1 597; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} 598; CHECK-NEXT: retq 599 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 600 ret <8 x double> %res 601} 602 603define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 604; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp: 605; CHECK: ## BB#0: 606; CHECK-NEXT: kmovw %edi, %k1 607; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} 608; CHECK-NEXT: retq 609 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 610 ret <8 x double> %res 611} 612 613define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 614; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz: 615; CHECK: ## BB#0: 616; CHECK-NEXT: kmovw %edi, %k1 617; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} 618; CHECK-NEXT: retq 619 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 620 ret <8 x double> %res 621} 622 623define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 624; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current: 625; CHECK: ## BB#0: 626; CHECK-NEXT: kmovw %edi, %k1 627; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} 628; CHECK-NEXT: retq 629 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 630 ret <8 x double> %res 631} 632 633define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 634; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne: 635; CHECK: ## BB#0: 636; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 637; CHECK-NEXT: retq 638 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 639 ret <8 x double> %res 640} 641 642define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 643; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn: 644; CHECK: ## BB#0: 645; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 646; CHECK-NEXT: retq 647 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 648 ret <8 x double> %res 649} 650 651define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 652; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp: 653; CHECK: ## BB#0: 654; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 655; CHECK-NEXT: retq 656 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 657 ret <8 x double> %res 658} 659 660define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 661; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz: 662; CHECK: ## BB#0: 663; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 664; CHECK-NEXT: retq 665 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 666 ret <8 x double> %res 667} 668 669define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 670; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current: 671; CHECK: ## BB#0: 672; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 673; CHECK-NEXT: retq 674 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 675 ret <8 x double> %res 676} 677 678define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 679; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512: 680; CHECK: ## BB#0: 681; CHECK-NEXT: kmovw %edi, %k1 682; CHECK-NEXT: vmovaps %zmm0, %zmm3 683; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1} 684; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 685; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 686; CHECK-NEXT: retq 687 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 688 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 689 %res2 = fadd <8 x double> %res, %res1 690 ret <8 x double> %res2 691} 692 693declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 694 695define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 696; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512: 697; CHECK: ## BB#0: 698; CHECK-NEXT: kmovw %edi, %k1 699; CHECK-NEXT: vmovaps %zmm2, %zmm3 700; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1} 701; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 702; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 703; CHECK-NEXT: retq 704 %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 705 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 706 %res2 = fadd <8 x double> %res, %res1 707 ret <8 x double> %res2 708} 709 710define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 711; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512: 712; CHECK: ## BB#0: 713; CHECK-NEXT: kmovw %edi, %k1 714; CHECK-NEXT: vmovaps %zmm0, %zmm3 715; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1} 716; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 717; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 718; CHECK-NEXT: retq 719 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 720 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 721 %res2 = fadd <16 x float> %res, %res1 722 ret <16 x float> %res2 723} 724 725declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 726 727define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 728; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512: 729; CHECK: ## BB#0: 730; CHECK-NEXT: kmovw %edi, %k1 731; CHECK-NEXT: vmovaps %zmm2, %zmm3 732; CHECK-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1} 733; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 734; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 735; CHECK-NEXT: retq 736 %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 737 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 738 %res2 = fadd <16 x float> %res, %res1 739 ret <16 x float> %res2 740} 741 742define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 743; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512: 744; CHECK: ## BB#0: 745; CHECK-NEXT: kmovw %edi, %k1 746; CHECK-NEXT: vmovaps %zmm0, %zmm3 747; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1} 748; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 749; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 750; CHECK-NEXT: retq 751 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 752 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 753 %res2 = fadd <8 x double> %res, %res1 754 ret <8 x double> %res2 755} 756 757define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 758; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512: 759; CHECK: ## BB#0: 760; CHECK-NEXT: kmovw %edi, %k1 761; CHECK-NEXT: vmovaps %zmm0, %zmm3 762; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1} 763; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 764; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 765; CHECK-NEXT: retq 766 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 767 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 768 %res2 = fadd <16 x float> %res, %res1 769 ret <16 x float> %res2 770} 771