• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32)
6declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i32)
7
8define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
9; CHECK-LABEL: test_x86_vfnmadd_ps_z:
10; CHECK:       # %bb.0:
11; CHECK-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xac,0xc2]
12; CHECK-NEXT:    # zmm0 = -(zmm1 * zmm0) + zmm2
13; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
15  %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %a2)
16  ret <16 x float> %2
17}
18
19define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
20; X86-LABEL: test_mask_vfnmadd_ps:
21; X86:       # %bb.0:
22; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
23; X86-NEXT:    vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
24; X86-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
25; X86-NEXT:    retl # encoding: [0xc3]
26;
27; X64-LABEL: test_mask_vfnmadd_ps:
28; X64:       # %bb.0:
29; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
30; X64-NEXT:    vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
31; X64-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
32; X64-NEXT:    retq # encoding: [0xc3]
33  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
34  %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %a2)
35  %3 = bitcast i16 %mask to <16 x i1>
36  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %a0
37  ret <16 x float> %4
38}
39
40define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
41; CHECK-LABEL: test_x86_vfnmadd_pd_z:
42; CHECK:       # %bb.0:
43; CHECK-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xac,0xc2]
44; CHECK-NEXT:    # zmm0 = -(zmm1 * zmm0) + zmm2
45; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
46  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
47  %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %a2)
48  ret <8 x double> %2
49}
50
51define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
52; X86-LABEL: test_mask_vfnmadd_pd:
53; X86:       # %bb.0:
54; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
55; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
56; X86-NEXT:    vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
57; X86-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
58; X86-NEXT:    retl # encoding: [0xc3]
59;
60; X64-LABEL: test_mask_vfnmadd_pd:
61; X64:       # %bb.0:
62; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
63; X64-NEXT:    vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
64; X64-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
65; X64-NEXT:    retq # encoding: [0xc3]
66  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
67  %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %a2)
68  %3 = bitcast i8 %mask to <8 x i1>
69  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %a0
70  ret <8 x double> %4
71}
72
73define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
74; CHECK-LABEL: test_x86_vfnmsubps_z:
75; CHECK:       # %bb.0:
76; CHECK-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xae,0xc2]
77; CHECK-NEXT:    # zmm0 = -(zmm1 * zmm0) - zmm2
78; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
79  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
80  %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
81  %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %2)
82  ret <16 x float> %3
83}
84
85define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
86; X86-LABEL: test_mask_vfnmsub_ps:
87; X86:       # %bb.0:
88; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
89; X86-NEXT:    vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
90; X86-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
91; X86-NEXT:    retl # encoding: [0xc3]
92;
93; X64-LABEL: test_mask_vfnmsub_ps:
94; X64:       # %bb.0:
95; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
96; X64-NEXT:    vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
97; X64-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
98; X64-NEXT:    retq # encoding: [0xc3]
99  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
100  %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
101  %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %2)
102  %4 = bitcast i16 %mask to <16 x i1>
103  %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %a0
104  ret <16 x float> %5
105}
106
107define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
108; CHECK-LABEL: test_x86_vfnmsubpd_z:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
111; CHECK-NEXT:    # zmm0 = -(zmm1 * zmm0) - zmm2
112; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
113  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
114  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
115  %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
116  ret <8 x double> %3
117}
118
119define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
120; X86-LABEL: test_mask_vfnmsub_pd:
121; X86:       # %bb.0:
122; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
123; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
124; X86-NEXT:    vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
125; X86-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
126; X86-NEXT:    retl # encoding: [0xc3]
127;
128; X64-LABEL: test_mask_vfnmsub_pd:
129; X64:       # %bb.0:
130; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
131; X64-NEXT:    vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
132; X64-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
133; X64-NEXT:    retq # encoding: [0xc3]
134  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
135  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
136  %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
137  %4 = bitcast i8 %mask to <8 x i1>
138  %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
139  ret <8 x double> %5
140}
141
142define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
143; CHECK-LABEL: test_x86_vfmaddsubps_z:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xa6,0xc2]
146; CHECK-NEXT:    # zmm0 = (zmm1 * zmm0) +/- zmm2
147; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
148  %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) #2
149  ret <16 x float> %res
150}
151
152define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
153; X86-LABEL: test_mask_fmaddsub_ps:
154; X86:       # %bb.0:
155; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
156; X86-NEXT:    vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
157; X86-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
158; X86-NEXT:    retl # encoding: [0xc3]
159;
160; X64-LABEL: test_mask_fmaddsub_ps:
161; X64:       # %bb.0:
162; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
163; X64-NEXT:    vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
164; X64-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
165; X64-NEXT:    retq # encoding: [0xc3]
166  %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i32 4)
167  %bc = bitcast i16 %mask to <16 x i1>
168  %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a
169  ret <16 x float> %sel
170}
171
172declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) nounwind readnone
173
174define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
175; CHECK-LABEL: test_x86_vfmaddsubpd_z:
176; CHECK:       # %bb.0:
177; CHECK-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xa6,0xc2]
178; CHECK-NEXT:    # zmm0 = (zmm1 * zmm0) +/- zmm2
179; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
180  %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) #2
181  ret <8 x double> %res
182}
183declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) nounwind readnone
184
185define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
186; X86-LABEL: test_mask_vfmaddsub_pd:
187; X86:       # %bb.0:
188; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
189; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
190; X86-NEXT:    vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
191; X86-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
192; X86-NEXT:    retl # encoding: [0xc3]
193;
194; X64-LABEL: test_mask_vfmaddsub_pd:
195; X64:       # %bb.0:
196; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
197; X64-NEXT:    vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
198; X64-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
199; X64-NEXT:    retq # encoding: [0xc3]
200  %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) #2
201  %bc = bitcast i8 %mask to <8 x i1>
202  %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
203  ret <8 x double> %sel
204}
205
206define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
207; X86-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
208; X86:       # %bb.0:
209; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
210; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
211; X86-NEXT:    vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
212; X86-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
213; X86-NEXT:    retl # encoding: [0xc3]
214;
215; X64-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
216; X64:       # %bb.0:
217; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
218; X64-NEXT:    vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
219; X64-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
220; X64-NEXT:    retq # encoding: [0xc3]
221  %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i32 4)
222  %bc = bitcast i8 %x3 to <8 x i1>
223  %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %x0
224  ret <8 x double> %sel
225}
226
227define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
228; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
229; X86:       # %bb.0:
230; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
231; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
232; X86-NEXT:    vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1]
233; X86-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
234; X86-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
235; X86-NEXT:    retl # encoding: [0xc3]
236;
237; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
238; X64:       # %bb.0:
239; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
240; X64-NEXT:    vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1]
241; X64-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
242; X64-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
243; X64-NEXT:    retq # encoding: [0xc3]
244  %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
245  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
246  %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2)
247  %4 = shufflevector <8 x double> %3, <8 x double> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
248  %5 = bitcast i8 %x3 to <8 x i1>
249  %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %x2
250  ret <8 x double> %6
251}
252
253define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
254; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
255; X86:       # %bb.0:
256; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
257; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
258; X86-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2]
259; X86-NEXT:    # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
260; X86-NEXT:    retl # encoding: [0xc3]
261;
262; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
263; X64:       # %bb.0:
264; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
265; X64-NEXT:    vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2]
266; X64-NEXT:    # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
267; X64-NEXT:    retq # encoding: [0xc3]
268  %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
269  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
270  %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2)
271  %4 = shufflevector <8 x double> %3, <8 x double> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
272  %5 = bitcast i8 %x3 to <8 x i1>
273  %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> zeroinitializer
274  ret <8 x double> %6
275}
276
277define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
278; X86-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
279; X86:       # %bb.0:
280; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
281; X86-NEXT:    vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
282; X86-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
283; X86-NEXT:    retl # encoding: [0xc3]
284;
285; X64-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
286; X64:       # %bb.0:
287; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
288; X64-NEXT:    vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
289; X64-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
290; X64-NEXT:    retq # encoding: [0xc3]
291  %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i32 4)
292  %bc = bitcast i16 %x3 to <16 x i1>
293  %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %x0
294  ret <16 x float> %sel
295}
296
297define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
298; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
299; X86:       # %bb.0:
300; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
301; X86-NEXT:    vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1]
302; X86-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
303; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
304; X86-NEXT:    retl # encoding: [0xc3]
305;
306; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
307; X64:       # %bb.0:
308; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
309; X64-NEXT:    vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1]
310; X64-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
311; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
312; X64-NEXT:    retq # encoding: [0xc3]
313  %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
314  %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
315  %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2)
316  %4 = shufflevector <16 x float> %3, <16 x float> %1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
317  %5 = bitcast i16 %x3 to <16 x i1>
318  %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> %x2
319  ret <16 x float> %6
320}
321
322define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
323; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
324; X86:       # %bb.0:
325; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
326; X86-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2]
327; X86-NEXT:    # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
328; X86-NEXT:    retl # encoding: [0xc3]
329;
330; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
331; X64:       # %bb.0:
332; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
333; X64-NEXT:    vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2]
334; X64-NEXT:    # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
335; X64-NEXT:    retq # encoding: [0xc3]
336  %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
337  %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
338  %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2)
339  %4 = shufflevector <16 x float> %3, <16 x float> %1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
340  %5 = bitcast i16 %x3 to <16 x i1>
341  %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> zeroinitializer
342  ret <16 x float> %6
343}
344
345define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
346; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
347; X86:       # %bb.0:
348; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
349; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
350; X86-NEXT:    vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1]
351; X86-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
352; X86-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
353; X86-NEXT:    retl # encoding: [0xc3]
354;
355; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
356; X64:       # %bb.0:
357; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
358; X64-NEXT:    vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1]
359; X64-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
360; X64-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
361; X64-NEXT:    retq # encoding: [0xc3]
362  %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
363  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
364  %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2)
365  %4 = shufflevector <8 x double> %1, <8 x double> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
366  %5 = bitcast i8 %x3 to <8 x i1>
367  %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %x2
368  ret <8 x double> %6
369}
370
371define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
372; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
373; X86:       # %bb.0:
374; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
375; X86-NEXT:    vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1]
376; X86-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
377; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
378; X86-NEXT:    retl # encoding: [0xc3]
379;
380; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
381; X64:       # %bb.0:
382; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
383; X64-NEXT:    vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1]
384; X64-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
385; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
386; X64-NEXT:    retq # encoding: [0xc3]
387  %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
388  %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
389  %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2)
390  %4 = shufflevector <16 x float> %1, <16 x float> %3, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
391  %5 = bitcast i16 %x3 to <16 x i1>
392  %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> %x2
393  ret <16 x float> %6
394}
395
396define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
397; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rne:
398; X86:       # %bb.0:
399; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
400; X86-NEXT:    vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1]
401; X86-NEXT:    retl # encoding: [0xc3]
402;
403; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rne:
404; X64:       # %bb.0:
405; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
406; X64-NEXT:    vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1]
407; X64-NEXT:    retq # encoding: [0xc3]
408  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind
409  %bc = bitcast i16 %mask to <16 x i1>
410  %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
411  ret <16 x float> %sel
412}
413
414define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
415; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn:
416; X86:       # %bb.0:
417; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
418; X86-NEXT:    vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x39,0x98,0xc1]
419; X86-NEXT:    retl # encoding: [0xc3]
420;
421; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn:
422; X64:       # %bb.0:
423; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
424; X64-NEXT:    vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x39,0x98,0xc1]
425; X64-NEXT:    retq # encoding: [0xc3]
426  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind
427  %bc = bitcast i16 %mask to <16 x i1>
428  %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
429  ret <16 x float> %sel
430}
431
432define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
433; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp:
434; X86:       # %bb.0:
435; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
436; X86-NEXT:    vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x59,0x98,0xc1]
437; X86-NEXT:    retl # encoding: [0xc3]
438;
439; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp:
440; X64:       # %bb.0:
441; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
442; X64-NEXT:    vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x59,0x98,0xc1]
443; X64-NEXT:    retq # encoding: [0xc3]
444  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind
445  %bc = bitcast i16 %mask to <16 x i1>
446  %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
447  ret <16 x float> %sel
448}
449
450define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
451; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz:
452; X86:       # %bb.0:
453; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
454; X86-NEXT:    vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x79,0x98,0xc1]
455; X86-NEXT:    retl # encoding: [0xc3]
456;
457; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz:
458; X64:       # %bb.0:
459; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
460; X64-NEXT:    vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x79,0x98,0xc1]
461; X64-NEXT:    retq # encoding: [0xc3]
462  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind
463  %bc = bitcast i16 %mask to <16 x i1>
464  %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
465  ret <16 x float> %sel
466}
467
468define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
469; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_current:
470; X86:       # %bb.0:
471; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
472; X86-NEXT:    vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
473; X86-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
474; X86-NEXT:    retl # encoding: [0xc3]
475;
476; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_current:
477; X64:       # %bb.0:
478; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
479; X64-NEXT:    vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
480; X64-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
481; X64-NEXT:    retq # encoding: [0xc3]
482  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) nounwind
483  %bc = bitcast i16 %mask to <16 x i1>
484  %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
485  ret <16 x float> %sel
486}
487
488define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
489; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne:
490; CHECK:       # %bb.0:
491; CHECK-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
492; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
493  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind
494  ret <16 x float> %res
495}
496
497define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
498; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn:
499; CHECK:       # %bb.0:
500; CHECK-NEXT:    vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
501; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
502  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind
503  ret <16 x float> %res
504}
505
506define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
507; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp:
508; CHECK:       # %bb.0:
509; CHECK-NEXT:    vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
510; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
511  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind
512  ret <16 x float> %res
513}
514
515define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
516; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz:
517; CHECK:       # %bb.0:
518; CHECK-NEXT:    vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
519; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
520  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind
521  ret <16 x float> %res
522}
523
524define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
525; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current:
526; CHECK:       # %bb.0:
527; CHECK-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
528; CHECK-NEXT:    # zmm0 = (zmm1 * zmm0) + zmm2
529; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
530  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) nounwind
531  ret <16 x float> %res
532}
533
534define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
535; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
536; X86:       # %bb.0:
537; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
538; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
539; X86-NEXT:    vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1]
540; X86-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) - zmm2
541; X86-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
542; X86-NEXT:    retl # encoding: [0xc3]
543;
544; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
545; X64:       # %bb.0:
546; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
547; X64-NEXT:    vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1]
548; X64-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) - zmm2
549; X64-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
550; X64-NEXT:    retq # encoding: [0xc3]
551  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
552  %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %1)
553  %3 = bitcast i8 %x3 to <8 x i1>
554  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %x2
555  ret <8 x double> %4
556}
557
558define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
559; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
560; X86:       # %bb.0:
561; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
562; X86-NEXT:    vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1]
563; X86-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) - zmm2
564; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
565; X86-NEXT:    retl # encoding: [0xc3]
566;
567; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
568; X64:       # %bb.0:
569; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
570; X64-NEXT:    vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1]
571; X64-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) - zmm2
572; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
573; X64-NEXT:    retq # encoding: [0xc3]
574  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
575  %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %1)
576  %3 = bitcast i16 %x3 to <16 x i1>
577  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %x2
578  ret <16 x float> %4
579}
580
581define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
582; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rne:
583; X86:       # %bb.0:
584; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
585; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
586; X86-NEXT:    vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1]
587; X86-NEXT:    retl # encoding: [0xc3]
588;
589; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rne:
590; X64:       # %bb.0:
591; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
592; X64-NEXT:    vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1]
593; X64-NEXT:    retq # encoding: [0xc3]
594  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind
595  %bc = bitcast i8 %mask to <8 x i1>
596  %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
597  ret <8 x double> %sel
598}
599
600define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
601; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn:
602; X86:       # %bb.0:
603; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
604; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
605; X86-NEXT:    vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x98,0xc1]
606; X86-NEXT:    retl # encoding: [0xc3]
607;
608; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn:
609; X64:       # %bb.0:
610; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
611; X64-NEXT:    vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x98,0xc1]
612; X64-NEXT:    retq # encoding: [0xc3]
613  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind
614  %bc = bitcast i8 %mask to <8 x i1>
615  %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
616  ret <8 x double> %sel
617}
618
619define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
620; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp:
621; X86:       # %bb.0:
622; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
623; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
624; X86-NEXT:    vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x98,0xc1]
625; X86-NEXT:    retl # encoding: [0xc3]
626;
627; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp:
628; X64:       # %bb.0:
629; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
630; X64-NEXT:    vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x98,0xc1]
631; X64-NEXT:    retq # encoding: [0xc3]
632  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind
633  %bc = bitcast i8 %mask to <8 x i1>
634  %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
635  ret <8 x double> %sel
636}
637
638define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
639; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz:
640; X86:       # %bb.0:
641; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
642; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
643; X86-NEXT:    vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x98,0xc1]
644; X86-NEXT:    retl # encoding: [0xc3]
645;
646; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz:
647; X64:       # %bb.0:
648; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
649; X64-NEXT:    vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x98,0xc1]
650; X64-NEXT:    retq # encoding: [0xc3]
651  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind
652  %bc = bitcast i8 %mask to <8 x i1>
653  %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
654  ret <8 x double> %sel
655}
656
657define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
658; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_current:
659; X86:       # %bb.0:
660; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
661; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
662; X86-NEXT:    vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
663; X86-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
664; X86-NEXT:    retl # encoding: [0xc3]
665;
666; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_current:
667; X64:       # %bb.0:
668; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
669; X64-NEXT:    vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
670; X64-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
671; X64-NEXT:    retq # encoding: [0xc3]
672  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) nounwind
673  %bc = bitcast i8 %mask to <8 x i1>
674  %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
675  ret <8 x double> %sel
676}
677
678define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
679; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne:
680; CHECK:       # %bb.0:
681; CHECK-NEXT:    vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
682; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
683  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind
684  ret <8 x double> %res
685}
686
687define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
688; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn:
689; CHECK:       # %bb.0:
690; CHECK-NEXT:    vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
691; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
692  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind
693  ret <8 x double> %res
694}
695
696define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
697; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp:
698; CHECK:       # %bb.0:
699; CHECK-NEXT:    vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
700; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
701  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind
702  ret <8 x double> %res
703}
704
705define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
706; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz:
707; CHECK:       # %bb.0:
708; CHECK-NEXT:    vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
709; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
710  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind
711  ret <8 x double> %res
712}
713
714define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
715; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current:
716; CHECK:       # %bb.0:
717; CHECK-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
718; CHECK-NEXT:    # zmm0 = (zmm1 * zmm0) + zmm2
719; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
720  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) nounwind
721  ret <8 x double> %res
722}
723
724define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
725; X86-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
726; X86:       # %bb.0:
727; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
728; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
729; X86-NEXT:    vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
730; X86-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
731; X86-NEXT:    retl # encoding: [0xc3]
732;
733; X64-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
734; X64:       # %bb.0:
735; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
736; X64-NEXT:    vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
737; X64-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
738; X64-NEXT:    retq # encoding: [0xc3]
739  %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i32 4)
740  %bc = bitcast i8 %x3 to <8 x i1>
741  %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %x0
742  ret <8 x double> %sel
743}
744
745define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
746; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
747; X86:       # %bb.0:
748; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
749; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
750; X86-NEXT:    vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1]
751; X86-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) + zmm2
752; X86-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
753; X86-NEXT:    retl # encoding: [0xc3]
754;
755; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
756; X64:       # %bb.0:
757; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
758; X64-NEXT:    vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1]
759; X64-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) + zmm2
760; X64-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
761; X64-NEXT:    retq # encoding: [0xc3]
762  %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
763  %2 = bitcast i8 %x3 to <8 x i1>
764  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2
765  ret <8 x double> %3
766}
767
768define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
769; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
770; X86:       # %bb.0:
771; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
772; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
773; X86-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2]
774; X86-NEXT:    # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
775; X86-NEXT:    retl # encoding: [0xc3]
776;
777; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
778; X64:       # %bb.0:
779; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
780; X64-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2]
781; X64-NEXT:    # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
782; X64-NEXT:    retq # encoding: [0xc3]
783  %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
784  %2 = bitcast i8 %x3 to <8 x i1>
785  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
786  ret <8 x double> %3
787}
788
789define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
790; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
791; X86:       # %bb.0:
792; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
793; X86-NEXT:    vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
794; X86-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
795; X86-NEXT:    retl # encoding: [0xc3]
796;
797; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
798; X64:       # %bb.0:
799; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
800; X64-NEXT:    vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
801; X64-NEXT:    # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
802; X64-NEXT:    retq # encoding: [0xc3]
803  %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i32 4)
804  %bc = bitcast i16 %x3 to <16 x i1>
805  %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %x0
806  ret <16 x float> %sel
807}
808
809define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
810; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
811; X86:       # %bb.0:
812; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
813; X86-NEXT:    vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1]
814; X86-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) + zmm2
815; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
816; X86-NEXT:    retl # encoding: [0xc3]
817;
818; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
819; X64:       # %bb.0:
820; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
821; X64-NEXT:    vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1]
822; X64-NEXT:    # zmm2 {%k1} = (zmm0 * zmm1) + zmm2
823; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
824; X64-NEXT:    retq # encoding: [0xc3]
825  %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
826  %2 = bitcast i16 %x3 to <16 x i1>
827  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2
828  ret <16 x float> %3
829}
830
831define <16 x float> @test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
832; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
833; X86:       # %bb.0:
834; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
835; X86-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2]
836; X86-NEXT:    # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
837; X86-NEXT:    retl # encoding: [0xc3]
838;
839; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
840; X64:       # %bb.0:
841; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
842; X64-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2]
843; X64-NEXT:    # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
844; X64-NEXT:    retq # encoding: [0xc3]
845  %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
846  %2 = bitcast i16 %x3 to <16 x i1>
847  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
848  ret <16 x float> %3
849}
850
851define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
852; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
853; X86:       # %bb.0:
854; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
855; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
856; X86-NEXT:    vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1]
857; X86-NEXT:    retl # encoding: [0xc3]
858;
859; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
860; X64:       # %bb.0:
861; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
862; X64-NEXT:    vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1]
863; X64-NEXT:    retq # encoding: [0xc3]
864  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
865  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
866  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8)
867  %4 = bitcast i8 %mask to <8 x i1>
868  %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
869  ret <8 x double> %5
870}
871
872define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
873; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn:
874; X86:       # %bb.0:
875; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
876; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
877; X86-NEXT:    vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x9e,0xc1]
878; X86-NEXT:    retl # encoding: [0xc3]
879;
880; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn:
881; X64:       # %bb.0:
882; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
883; X64-NEXT:    vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x9e,0xc1]
884; X64-NEXT:    retq # encoding: [0xc3]
885  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
886  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
887  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9)
888  %4 = bitcast i8 %mask to <8 x i1>
889  %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
890  ret <8 x double> %5
891}
892
893define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
894; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp:
895; X86:       # %bb.0:
896; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
897; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
898; X86-NEXT:    vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x9e,0xc1]
899; X86-NEXT:    retl # encoding: [0xc3]
900;
901; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp:
902; X64:       # %bb.0:
903; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
904; X64-NEXT:    vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x9e,0xc1]
905; X64-NEXT:    retq # encoding: [0xc3]
906  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
907  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
908  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10)
909  %4 = bitcast i8 %mask to <8 x i1>
910  %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
911  ret <8 x double> %5
912}
913
914define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
915; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz:
916; X86:       # %bb.0:
917; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
918; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
919; X86-NEXT:    vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x9e,0xc1]
920; X86-NEXT:    retl # encoding: [0xc3]
921;
922; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz:
923; X64:       # %bb.0:
924; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
925; X64-NEXT:    vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x9e,0xc1]
926; X64-NEXT:    retq # encoding: [0xc3]
927  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
928  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
929  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11)
930  %4 = bitcast i8 %mask to <8 x i1>
931  %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
932  ret <8 x double> %5
933}
934
935define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
936; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_current:
937; X86:       # %bb.0:
938; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
939; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
940; X86-NEXT:    vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
941; X86-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
942; X86-NEXT:    retl # encoding: [0xc3]
943;
944; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_current:
945; X64:       # %bb.0:
946; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
947; X64-NEXT:    vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
948; X64-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
949; X64-NEXT:    retq # encoding: [0xc3]
950  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
951  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
952  %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
953  %4 = bitcast i8 %mask to <8 x i1>
954  %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
955  ret <8 x double> %5
956}
957
958define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
959; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne:
960; CHECK:       # %bb.0:
961; CHECK-NEXT:    vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
962; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
963  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
964  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
965  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8)
966  ret <8 x double> %3
967}
968
969define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
970; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn:
971; CHECK:       # %bb.0:
972; CHECK-NEXT:    vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
973; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
974  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
975  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
976  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9)
977  ret <8 x double> %3
978}
979
980define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
981; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp:
982; CHECK:       # %bb.0:
983; CHECK-NEXT:    vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
984; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
985  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
986  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
987  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10)
988  ret <8 x double> %3
989}
990
991define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
992; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz:
993; CHECK:       # %bb.0:
994; CHECK-NEXT:    vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
995; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
996  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
997  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
998  %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11)
999  ret <8 x double> %3
1000}
1001
1002define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
1003; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current:
1004; CHECK:       # %bb.0:
1005; CHECK-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
1006; CHECK-NEXT:    # zmm0 = -(zmm1 * zmm0) - zmm2
1007; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1008  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
1009  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
1010  %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
1011  ret <8 x double> %3
1012}
1013
1014define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
1015; X86-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
1016; X86:       # %bb.0:
1017; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1018; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1019; X86-NEXT:    vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
1020; X86-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
1021; X86-NEXT:    retl # encoding: [0xc3]
1022;
1023; X64-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
1024; X64:       # %bb.0:
1025; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1026; X64-NEXT:    vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
1027; X64-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
1028; X64-NEXT:    retq # encoding: [0xc3]
1029  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x1
1030  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
1031  %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %1, <8 x double> %2)
1032  %4 = bitcast i8 %x3 to <8 x i1>
1033  %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %x0
1034  ret <8 x double> %5
1035}
1036
1037define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
1038; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
1039; X86:       # %bb.0:
1040; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1041; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1042; X86-NEXT:    vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1]
1043; X86-NEXT:    # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
1044; X86-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1045; X86-NEXT:    retl # encoding: [0xc3]
1046;
1047; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
1048; X64:       # %bb.0:
1049; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1050; X64-NEXT:    vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1]
1051; X64-NEXT:    # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
1052; X64-NEXT:    vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1053; X64-NEXT:    retq # encoding: [0xc3]
1054  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x0
1055  %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
1056  %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %1, <8 x double> %x1, <8 x double> %2)
1057  %4 = bitcast i8 %x3 to <8 x i1>
1058  %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %x2
1059  ret <8 x double> %5
1060}
1061
1062define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
1063; X86-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
1064; X86:       # %bb.0:
1065; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1066; X86-NEXT:    vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
1067; X86-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
1068; X86-NEXT:    retl # encoding: [0xc3]
1069;
1070; X64-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
1071; X64:       # %bb.0:
1072; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1073; X64-NEXT:    vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
1074; X64-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
1075; X64-NEXT:    retq # encoding: [0xc3]
1076  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x1
1077  %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
1078  %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %1, <16 x float> %2)
1079  %4 = bitcast i16 %x3 to <16 x i1>
1080  %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %x0
1081  ret <16 x float> %5
1082}
1083
1084define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
1085; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
1086; X86:       # %bb.0:
1087; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1088; X86-NEXT:    vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1]
1089; X86-NEXT:    # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
1090; X86-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1091; X86-NEXT:    retl # encoding: [0xc3]
1092;
1093; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
1094; X64:       # %bb.0:
1095; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1096; X64-NEXT:    vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1]
1097; X64-NEXT:    # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
1098; X64-NEXT:    vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1099; X64-NEXT:    retq # encoding: [0xc3]
1100  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
1101  %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
1102  %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %1, <16 x float> %x1, <16 x float> %2)
1103  %4 = bitcast i16 %x3 to <16 x i1>
1104  %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %x2
1105  ret <16 x float> %5
1106}
1107
1108define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
1109; X86-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
1110; X86:       # %bb.0:
1111; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1112; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1113; X86-NEXT:    vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
1114; X86-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
1115; X86-NEXT:    retl # encoding: [0xc3]
1116;
1117; X64-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
1118; X64:       # %bb.0:
1119; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1120; X64-NEXT:    vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
1121; X64-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
1122; X64-NEXT:    retq # encoding: [0xc3]
1123  %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x1
1124  %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %1, <8 x double> %x2)
1125  %3 = bitcast i8 %x3 to <8 x i1>
1126  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %x0
1127  ret <8 x double> %4
1128}
1129
1130define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
1131; X86-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
1132; X86:       # %bb.0:
1133; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1134; X86-NEXT:    vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
1135; X86-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
1136; X86-NEXT:    retl # encoding: [0xc3]
1137;
1138; X64-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
1139; X64:       # %bb.0:
1140; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1141; X64-NEXT:    vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
1142; X64-NEXT:    # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
1143; X64-NEXT:    retq # encoding: [0xc3]
1144  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x1
1145  %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %1, <16 x float> %x2)
1146  %3 = bitcast i16 %x3 to <16 x i1>
1147  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %x0
1148  ret <16 x float> %4
1149}
1150
1151; This test case used to crash due to combineFMA not bitcasting results of isFNEG.
1152define <4 x float> @foo() {
1153; X86-LABEL: foo:
1154; X86:       # %bb.0: # %entry
1155; X86-NEXT:    vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
1156; X86-NEXT:    # xmm0 = mem[0],zero,zero,zero
1157; X86-NEXT:    vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
1158; X86-NEXT:    retl # encoding: [0xc3]
1159;
1160; X64-LABEL: foo:
1161; X64:       # %bb.0: # %entry
1162; X64-NEXT:    vmovss (%rax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
1163; X64-NEXT:    # xmm0 = mem[0],zero,zero,zero
1164; X64-NEXT:    vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
1165; X64-NEXT:    retq # encoding: [0xc3]
1166entry:
1167  %0 = load <4 x float>, <4 x float>* undef, align 16
1168  %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
1169  %1 = extractelement <4 x float> %sub, i64 0
1170  %2 = call float @llvm.x86.avx512.vfmadd.f32(float undef, float undef, float %1, i32 9)
1171  %3 = select i1 extractelement (<8 x i1> bitcast (<1 x i8> <i8 1> to <8 x i1>), i64 0), float %2, float undef
1172  %4 = insertelement <4 x float> undef, float %3, i64 0
1173  ret <4 x float> %4
1174}
1175
1176; Function Attrs: nounwind readnone
1177declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32)
1178
1179declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
1180declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)
1181