1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s 2 3declare <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 4declare <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 5 6define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 7 ; CHECK-LABEL: test_x86_vfnmadd_ps_z 8 ; CHECK: vfnmadd213ps %zmm 9 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 10 ret <16 x float> %res 11} 12declare <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 13 14define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 15 ; CHECK-LABEL: test_mask_vfnmadd_ps 16 ; CHECK: vfnmadd213ps %zmm 17 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 18 ret <16 x float> %res 19} 20 21define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 22 ; CHECK-LABEL: test_x86_vfnmadd_pd_z 23 ; CHECK: vfnmadd213pd %zmm 24 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 25 ret <8 x double> %res 26} 27declare <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 28 29define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 30 ; CHECK-LABEL: test_mask_vfnmadd_pd 31 ; CHECK: vfnmadd213pd %zmm 32 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 33 ret <8 x double> %res 34} 35 36define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 37 ; CHECK-LABEL: test_x86_vfnmsubps_z 38 ; CHECK: vfnmsub213ps %zmm 39 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 40 ret <16 x float> %res 41} 42declare <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 43 44define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 45 ; CHECK-LABEL: test_mask_vfnmsub_ps 46 ; CHECK: vfnmsub213ps %zmm 47 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 48 ret <16 x float> %res 49} 50 51define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 52 ; CHECK-LABEL: test_x86_vfnmsubpd_z 53 ; CHECK: vfnmsub213pd %zmm 54 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 55 ret <8 x double> %res 56} 57declare <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 58 59define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 60 ; CHECK-LABEL: test_mask_vfnmsub_pd 61 ; CHECK: vfnmsub213pd %zmm 62 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 63 ret <8 x double> %res 64} 65 66define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 67 ; CHECK-LABEL: test_x86_vfmaddsubps_z 68 ; CHECK: vfmaddsub213ps %zmm 69 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 70 ret <16 x float> %res 71} 72 73define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 74; CHECK-LABEL: test_mask_fmaddsub_ps: 75; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2] 76 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4) 77 ret <16 x float> %res 78} 79 80declare <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone 81 82define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 83 ; CHECK-LABEL: test_x86_vfmaddsubpd_z 84 ; CHECK: vfmaddsub213pd %zmm 85 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 86 ret <8 x double> %res 87} 88declare <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone 89 90define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 91 ; CHECK-LABEL: test_mask_vfmaddsub_pd 92 ; CHECK: vfmaddsub213pd %zmm 93 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 94 ret <8 x double> %res 95} 96 97define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 98; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512: 99; CHECK: ## BB#0: 100; CHECK-NEXT: movzbl %dil, %eax 101; CHECK-NEXT: kmovw %eax, %k1 102; CHECK-NEXT: vmovaps %zmm0, %zmm3 103; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} 104; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 105; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 106; CHECK-NEXT: retq 107 %res = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 108 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 109 %res2 = fadd <8 x double> %res, %res1 110 ret <8 x double> %res2 111} 112 113declare <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 114 115define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 116; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512: 117; CHECK: ## BB#0: 118; CHECK-NEXT: movzbl %dil, %eax 119; CHECK-NEXT: kmovw %eax, %k1 120; CHECK-NEXT: vmovaps %zmm2, %zmm3 121; CHECK-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm3 {%k1} 122; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 123; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 124; CHECK-NEXT: retq 125 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 126 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 127 %res2 = fadd <8 x double> %res, %res1 128 ret <8 x double> %res2 129} 130 131declare <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 132 133define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 134; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512: 135; CHECK: ## BB#0: 136; CHECK-NEXT: movzbl %dil, %eax 137; CHECK-NEXT: kmovw %eax, %k1 138; CHECK-NEXT: vmovaps %zmm0, %zmm3 139; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm3 {%k1} {z} 140; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 141; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 142; CHECK-NEXT: retq 143 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 144 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 145 %res2 = fadd <8 x double> %res, %res1 146 ret <8 x double> %res2 147} 148 149define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 150; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512: 151; CHECK: ## BB#0: 152; CHECK-NEXT: kmovw %edi, %k1 153; CHECK-NEXT: vmovaps %zmm0, %zmm3 154; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} 155; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 156; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 157; CHECK-NEXT: retq 158 %res = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 159 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 160 %res2 = fadd <16 x float> %res, %res1 161 ret <16 x float> %res2 162} 163 164declare <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 165 166define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 167; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512: 168; CHECK: ## BB#0: 169; CHECK-NEXT: kmovw %edi, %k1 170; CHECK-NEXT: vmovaps %zmm2, %zmm3 171; CHECK-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm3 {%k1} 172; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 173; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 174; CHECK-NEXT: retq 175 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 176 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 177 %res2 = fadd <16 x float> %res, %res1 178 ret <16 x float> %res2 179} 180 181declare <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 182 183define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 184; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512: 185; CHECK: ## BB#0: 186; CHECK-NEXT: kmovw %edi, %k1 187; CHECK-NEXT: vmovaps %zmm0, %zmm3 188; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm3 {%k1} {z} 189; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 190; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 191; CHECK-NEXT: retq 192 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 193 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 194 %res2 = fadd <16 x float> %res, %res1 195 ret <16 x float> %res2 196} 197 198declare <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 199 200define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 201; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512: 202; CHECK: ## BB#0: 203; CHECK-NEXT: movzbl %dil, %eax 204; CHECK-NEXT: kmovw %eax, %k1 205; CHECK-NEXT: vmovaps %zmm2, %zmm3 206; CHECK-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm3 {%k1} 207; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 208; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 209; CHECK-NEXT: retq 210 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 211 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 212 %res2 = fadd <8 x double> %res, %res1 213 ret <8 x double> %res2 214} 215 216declare <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 217 218define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 219; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512: 220; CHECK: ## BB#0: 221; CHECK-NEXT: kmovw %edi, %k1 222; CHECK-NEXT: vmovaps %zmm2, %zmm3 223; CHECK-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm3 {%k1} 224; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 225; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 226; CHECK-NEXT: retq 227 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 228 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 229 %res2 = fadd <16 x float> %res, %res1 230 ret <16 x float> %res2 231} 232 233define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 234 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne 235 ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2] 236 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind 237 ret <16 x float> %res 238} 239 240define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 241 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn 242 ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2] 243 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind 244 ret <16 x float> %res 245} 246 247define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 248 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp 249 ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2] 250 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind 251 ret <16 x float> %res 252} 253 254define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 255 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz 256 ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2] 257 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind 258 ret <16 x float> %res 259} 260 261define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 262 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current 263 ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2] 264 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind 265 ret <16 x float> %res 266} 267 268define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 269 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne 270 ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2] 271 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind 272 ret <16 x float> %res 273} 274 275define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 276 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn 277 ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2] 278 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind 279 ret <16 x float> %res 280} 281 282define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 283 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp 284 ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2] 285 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind 286 ret <16 x float> %res 287} 288 289define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 290 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz 291 ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2] 292 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind 293 ret <16 x float> %res 294} 295 296define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 297 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current 298 ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2] 299 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind 300 ret <16 x float> %res 301} 302 303declare <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 304 305define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 306; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512: 307; CHECK: ## BB#0: 308; CHECK-NEXT: movzbl %dil, %eax 309; CHECK-NEXT: kmovw %eax, %k1 310; CHECK-NEXT: vmovaps %zmm2, %zmm3 311; CHECK-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm3 {%k1} 312; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 313; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 314; CHECK-NEXT: retq 315 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 316 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 317 %res2 = fadd <8 x double> %res, %res1 318 ret <8 x double> %res2 319} 320 321declare <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 322 323define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 324; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512: 325; CHECK: ## BB#0: 326; CHECK-NEXT: kmovw %edi, %k1 327; CHECK-NEXT: vmovaps %zmm2, %zmm3 328; CHECK-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm3 {%k1} 329; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 330; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 331; CHECK-NEXT: retq 332 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 333 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 334 %res2 = fadd <16 x float> %res, %res1 335 ret <16 x float> %res2 336} 337 338define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 339 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne 340 ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2] 341 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 342 ret <8 x double> %res 343} 344 345define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 346 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn 347 ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2] 348 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 349 ret <8 x double> %res 350} 351 352define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 353 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp 354 ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2] 355 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 356 ret <8 x double> %res 357} 358 359define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 360 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz 361 ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2] 362 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 363 ret <8 x double> %res 364} 365 366define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 367 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current 368 ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2] 369 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 370 ret <8 x double> %res 371} 372 373define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 374 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne 375 ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2] 376 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 377 ret <8 x double> %res 378} 379 380define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 381 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn 382 ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2] 383 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 384 ret <8 x double> %res 385} 386 387define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 388 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp 389 ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2] 390 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 391 ret <8 x double> %res 392} 393 394define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 395 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz 396 ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2] 397 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 398 ret <8 x double> %res 399} 400 401define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 402 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current 403 ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2] 404 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 405 ret <8 x double> %res 406} 407 408define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 409; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512: 410; CHECK: ## BB#0: 411; CHECK-NEXT: movzbl %dil, %eax 412; CHECK-NEXT: kmovw %eax, %k1 413; CHECK-NEXT: vmovaps %zmm0, %zmm3 414; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} 415; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 416; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 417; CHECK-NEXT: retq 418 %res = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 419 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 420 %res2 = fadd <8 x double> %res, %res1 421 ret <8 x double> %res2 422} 423 424declare <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 425 426define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 427; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512: 428; CHECK: ## BB#0: 429; CHECK-NEXT: movzbl %dil, %eax 430; CHECK-NEXT: kmovw %eax, %k1 431; CHECK-NEXT: vmovaps %zmm2, %zmm3 432; CHECK-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm3 {%k1} 433; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 434; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 435; CHECK-NEXT: retq 436 %res = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 437 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 438 %res2 = fadd <8 x double> %res, %res1 439 ret <8 x double> %res2 440} 441 442declare <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 443 444define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 445; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512: 446; CHECK: ## BB#0: 447; CHECK-NEXT: movzbl %dil, %eax 448; CHECK-NEXT: kmovw %eax, %k1 449; CHECK-NEXT: vmovaps %zmm0, %zmm3 450; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm3 {%k1} {z} 451; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 452; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 453; CHECK-NEXT: retq 454 %res = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 455 %res1 = call <8 x double> @llvm.x86.avx512.maskz.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 456 %res2 = fadd <8 x double> %res, %res1 457 ret <8 x double> %res2 458} 459 460define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 461; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512: 462; CHECK: ## BB#0: 463; CHECK-NEXT: kmovw %edi, %k1 464; CHECK-NEXT: vmovaps %zmm0, %zmm3 465; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} 466; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 467; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 468; CHECK-NEXT: retq 469 %res = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 470 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 471 %res2 = fadd <16 x float> %res, %res1 472 ret <16 x float> %res2 473} 474 475declare <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 476 477define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 478; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512: 479; CHECK: ## BB#0: 480; CHECK-NEXT: kmovw %edi, %k1 481; CHECK-NEXT: vmovaps %zmm2, %zmm3 482; CHECK-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm3 {%k1} 483; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 484; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 485; CHECK-NEXT: retq 486 %res = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 487 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 488 %res2 = fadd <16 x float> %res, %res1 489 ret <16 x float> %res2 490} 491 492declare <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 493 494define <16 x float>@test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 495; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512: 496; CHECK: ## BB#0: 497; CHECK-NEXT: kmovw %edi, %k1 498; CHECK-NEXT: vmovaps %zmm0, %zmm3 499; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm3 {%k1} {z} 500; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 501; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 502; CHECK-NEXT: retq 503 %res = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 504 %res1 = call <16 x float> @llvm.x86.avx512.maskz.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 505 %res2 = fadd <16 x float> %res, %res1 506 ret <16 x float> %res2 507} 508 509 510define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 511 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne 512 ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2] 513 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind 514 ret <8 x double> %res 515} 516 517define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 518 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn 519 ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2] 520 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind 521 ret <8 x double> %res 522} 523 524define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 525 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp 526 ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2] 527 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind 528 ret <8 x double> %res 529} 530 531define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 532 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz 533 ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2] 534 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind 535 ret <8 x double> %res 536} 537 538define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 539 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current 540 ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2] 541 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind 542 ret <8 x double> %res 543} 544 545define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 546 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne 547 ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2] 548 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind 549 ret <8 x double> %res 550} 551 552define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 553 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn 554 ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2] 555 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind 556 ret <8 x double> %res 557} 558 559define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 560 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp 561 ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2] 562 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind 563 ret <8 x double> %res 564} 565 566define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 567 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz 568 ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2] 569 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind 570 ret <8 x double> %res 571} 572 573define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 574 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current 575 ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2] 576 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind 577 ret <8 x double> %res 578} 579 580define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 581; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512: 582; CHECK: ## BB#0: 583; CHECK-NEXT: movzbl %dil, %eax 584; CHECK-NEXT: kmovw %eax, %k1 585; CHECK-NEXT: vmovaps %zmm0, %zmm3 586; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm3 {%k1} 587; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 588; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 589; CHECK-NEXT: retq 590 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 591 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 592 %res2 = fadd <8 x double> %res, %res1 593 ret <8 x double> %res2 594} 595 596declare <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) 597 598define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 599; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512: 600; CHECK: ## BB#0: 601; CHECK-NEXT: movzbl %dil, %eax 602; CHECK-NEXT: kmovw %eax, %k1 603; CHECK-NEXT: vmovaps %zmm2, %zmm3 604; CHECK-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm3 {%k1} 605; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 606; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 607; CHECK-NEXT: retq 608 %res = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 609 %res1 = call <8 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 610 %res2 = fadd <8 x double> %res, %res1 611 ret <8 x double> %res2 612} 613 614define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 615; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512: 616; CHECK: ## BB#0: 617; CHECK-NEXT: kmovw %edi, %k1 618; CHECK-NEXT: vmovaps %zmm0, %zmm3 619; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm3 {%k1} 620; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 621; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 622; CHECK-NEXT: retq 623 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 624 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 625 %res2 = fadd <16 x float> %res, %res1 626 ret <16 x float> %res2 627} 628 629declare <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) 630 631define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 632; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512: 633; CHECK: ## BB#0: 634; CHECK-NEXT: kmovw %edi, %k1 635; CHECK-NEXT: vmovaps %zmm2, %zmm3 636; CHECK-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm3 {%k1} 637; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 638; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 639; CHECK-NEXT: retq 640 %res = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 641 %res1 = call <16 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 642 %res2 = fadd <16 x float> %res, %res1 643 ret <16 x float> %res2 644} 645 646define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 647; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512: 648; CHECK: ## BB#0: 649; CHECK-NEXT: movzbl %dil, %eax 650; CHECK-NEXT: kmovw %eax, %k1 651; CHECK-NEXT: vmovaps %zmm0, %zmm3 652; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm3 {%k1} 653; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 654; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 655; CHECK-NEXT: retq 656 %res = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 4) 657 %res1 = call <8 x double> @llvm.x86.avx512.mask.vfnmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) 658 %res2 = fadd <8 x double> %res, %res1 659 ret <8 x double> %res2 660} 661 662define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 663; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512: 664; CHECK: ## BB#0: 665; CHECK-NEXT: kmovw %edi, %k1 666; CHECK-NEXT: vmovaps %zmm0, %zmm3 667; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm3 {%k1} 668; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 669; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 670; CHECK-NEXT: retq 671 %res = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 4) 672 %res1 = call <16 x float> @llvm.x86.avx512.mask.vfnmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) 673 %res2 = fadd <16 x float> %res, %res1 674 ret <16 x float> %res2 675} 676