• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 AVX512 instruction set, defining the
11// instructions, and properties of the instructions which are needed for code
12// generation, machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16// Group template arguments that can be derived from the vector type (EltNum x
17// EltVT).  These are things like the register class for the writemask, etc.
18// The idea is to pass one of these as the template argument rather than the
19// individual arguments.
20// The template is also used for scalar types, in this case numelts is 1.
21class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22                      string suffix = ""> {
23  RegisterClass RC = rc;
24  ValueType EltVT = eltvt;
25  int NumElts = numelts;
26
27  // Corresponding mask register class.
28  RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29
30  // Corresponding write-mask register class.
31  RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
32
33  // The mask VT.
34  ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
35
36  // Suffix used in the instruction mnemonic.
37  string Suffix = suffix;
38
39  // VTName is a string name for vector VT. For vector types it will be
40  // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41  // It is a little bit complex for scalar types, where NumElts = 1.
42  // In this case we build v4f32 or v2f64
43  string VTName = "v" # !if (!eq (NumElts, 1),
44                        !if (!eq (EltVT.Size, 32), 4,
45                        !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
46
47  // The vector VT.
48  ValueType VT = !cast<ValueType>(VTName);
49
50  string EltTypeName = !cast<string>(EltVT);
51  // Size of the element type in bits, e.g. 32 for v16i32.
52  string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53  int EltSize = EltVT.Size;
54
55  // "i" for integer types and "f" for floating-point types
56  string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
57
58  // Size of RC in bits, e.g. 512 for VR512.
59  int Size = VT.Size;
60
61  // The corresponding memory operand, e.g. i512mem for VR512.
62  X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63  X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64  // FP scalar memory operand for intrinsics - ssmem/sdmem.
65  Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66                           !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
67
68  // Load patterns
69  // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70  //       due to load promotion during legalization
71  PatFrag LdFrag = !cast<PatFrag>("load" #
72                                  !if (!eq (TypeVariantName, "i"),
73                                       !if (!eq (Size, 128), "v2i64",
74                                       !if (!eq (Size, 256), "v4i64",
75                                       !if (!eq (Size, 512), "v8i64",
76                                            VTName))), VTName));
77
78  PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79                                         !if (!eq (TypeVariantName, "i"),
80                                               !if (!eq (Size, 128), "v2i64",
81                                               !if (!eq (Size, 256), "v4i64",
82                                               !if (!eq (Size, 512), "v8i64",
83                                                   VTName))), VTName));
84
85  PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
86
87  ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88                                          !cast<ComplexPattern>("sse_load_f32"),
89                                    !if (!eq (EltTypeName, "f64"),
90                                          !cast<ComplexPattern>("sse_load_f64"),
91                                    ?));
92
93  // The string to specify embedded broadcast in assembly.
94  string BroadcastStr = "{1to" # NumElts # "}";
95
96  // 8-bit compressed displacement tuple/subvector format.  This is only
97  // defined for NumElts <= 8.
98  CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
99                               !cast<CD8VForm>("CD8VT" # NumElts), ?);
100
101  SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
102                          !if (!eq (Size, 256), sub_ymm, ?));
103
104  Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
105                     !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
106                     SSEPackedInt));
107
108  RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
109
110  // A vector tye of the same width with element type i64. This is used to
111  // create patterns for logic ops.
112  ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
113
114  // A vector type of the same width with element type i32.  This is used to
115  // create the canonical constant zero node ImmAllZerosV.
116  ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
117  dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
118
119  string ZSuffix = !if (!eq (Size, 128), "Z128",
120                   !if (!eq (Size, 256), "Z256", "Z"));
121}
122
123def v64i8_info  : X86VectorVTInfo<64,  i8, VR512, "b">;
124def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
125def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
126def v8i64_info  : X86VectorVTInfo<8,  i64, VR512, "q">;
127def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
128def v8f64_info  : X86VectorVTInfo<8,  f64, VR512, "pd">;
129
130// "x" in v32i8x_info means RC = VR256X
131def v32i8x_info  : X86VectorVTInfo<32,  i8, VR256X, "b">;
132def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
133def v8i32x_info  : X86VectorVTInfo<8,  i32, VR256X, "d">;
134def v4i64x_info  : X86VectorVTInfo<4,  i64, VR256X, "q">;
135def v8f32x_info  : X86VectorVTInfo<8,  f32, VR256X, "ps">;
136def v4f64x_info  : X86VectorVTInfo<4,  f64, VR256X, "pd">;
137
138def v16i8x_info  : X86VectorVTInfo<16,  i8, VR128X, "b">;
139def v8i16x_info  : X86VectorVTInfo<8,  i16, VR128X, "w">;
140def v4i32x_info  : X86VectorVTInfo<4,  i32, VR128X, "d">;
141def v2i64x_info  : X86VectorVTInfo<2,  i64, VR128X, "q">;
142def v4f32x_info  : X86VectorVTInfo<4,  f32, VR128X, "ps">;
143def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
144
145// We map scalar types to the smallest (128-bit) vector type
146// with the appropriate element type. This allows to use the same masking logic.
147def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
148def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
149def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
150def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
151
152class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
153                           X86VectorVTInfo i128> {
154  X86VectorVTInfo info512 = i512;
155  X86VectorVTInfo info256 = i256;
156  X86VectorVTInfo info128 = i128;
157}
158
159def avx512vl_i8_info  : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
160                                             v16i8x_info>;
161def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
162                                             v8i16x_info>;
163def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
164                                             v4i32x_info>;
165def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
166                                             v2i64x_info>;
167def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
168                                             v4f32x_info>;
169def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
170                                             v2f64x_info>;
171
172class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
173                       ValueType _vt> {
174  RegisterClass KRC = _krc;
175  RegisterClass KRCWM = _krcwm;
176  ValueType KVT = _vt;
177}
178
179def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
180def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
181def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
182def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
183def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
184def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
185def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
186
187// This multiclass generates the masking variants from the non-masking
188// variant.  It only provides the assembly pieces for the masking variants.
189// It assumes custom ISel patterns for masking which can be provided as
190// template arguments.
191multiclass AVX512_maskable_custom<bits<8> O, Format F,
192                                  dag Outs,
193                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
194                                  string OpcodeStr,
195                                  string AttSrcAsm, string IntelSrcAsm,
196                                  list<dag> Pattern,
197                                  list<dag> MaskingPattern,
198                                  list<dag> ZeroMaskingPattern,
199                                  string MaskingConstraint = "",
200                                  bit IsCommutable = 0,
201                                  bit IsKCommutable = 0,
202                                  bit IsKZCommutable = IsCommutable> {
203  let isCommutable = IsCommutable in
204    def NAME: AVX512<O, F, Outs, Ins,
205                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
206                                     "$dst, "#IntelSrcAsm#"}",
207                       Pattern>;
208
209  // Prefer over VMOV*rrk Pat<>
210  let isCommutable = IsKCommutable in
211    def NAME#k: AVX512<O, F, Outs, MaskingIns,
212                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
213                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
214                       MaskingPattern>,
215              EVEX_K {
216      // In case of the 3src subclass this is overridden with a let.
217      string Constraints = MaskingConstraint;
218    }
219
220  // Zero mask does not add any restrictions to commute operands transformation.
221  // So, it is Ok to use IsCommutable instead of IsKCommutable.
222  let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
223    def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
224                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
225                                     "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
226                       ZeroMaskingPattern>,
227              EVEX_KZ;
228}
229
230
231// Common base class of AVX512_maskable and AVX512_maskable_3src.
232multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
233                                  dag Outs,
234                                  dag Ins, dag MaskingIns, dag ZeroMaskingIns,
235                                  string OpcodeStr,
236                                  string AttSrcAsm, string IntelSrcAsm,
237                                  dag RHS, dag MaskingRHS,
238                                  SDNode Select = vselect,
239                                  string MaskingConstraint = "",
240                                  bit IsCommutable = 0,
241                                  bit IsKCommutable = 0,
242                                  bit IsKZCommutable = IsCommutable> :
243  AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
244                         AttSrcAsm, IntelSrcAsm,
245                         [(set _.RC:$dst, RHS)],
246                         [(set _.RC:$dst, MaskingRHS)],
247                         [(set _.RC:$dst,
248                               (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
249                         MaskingConstraint, IsCommutable,
250                         IsKCommutable, IsKZCommutable>;
251
252// This multiclass generates the unconditional/non-masking, the masking and
253// the zero-masking variant of the vector instruction.  In the masking case, the
254// perserved vector elements come from a new dummy input operand tied to $dst.
255// This version uses a separate dag for non-masking and masking.
256multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
257                           dag Outs, dag Ins, string OpcodeStr,
258                           string AttSrcAsm, string IntelSrcAsm,
259                           dag RHS, dag MaskRHS,
260                           bit IsCommutable = 0, bit IsKCommutable = 0,
261                           SDNode Select = vselect> :
262   AVX512_maskable_custom<O, F, Outs, Ins,
263                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
264                          !con((ins _.KRCWM:$mask), Ins),
265                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
266                          [(set _.RC:$dst, RHS)],
267                          [(set _.RC:$dst,
268                              (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
269                          [(set _.RC:$dst,
270                              (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
271                          "$src0 = $dst", IsCommutable, IsKCommutable>;
272
273// This multiclass generates the unconditional/non-masking, the masking and
274// the zero-masking variant of the vector instruction.  In the masking case, the
275// perserved vector elements come from a new dummy input operand tied to $dst.
276multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
277                           dag Outs, dag Ins, string OpcodeStr,
278                           string AttSrcAsm, string IntelSrcAsm,
279                           dag RHS,
280                           bit IsCommutable = 0, bit IsKCommutable = 0,
281                           bit IsKZCommutable = IsCommutable,
282                           SDNode Select = vselect> :
283   AVX512_maskable_common<O, F, _, Outs, Ins,
284                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
285                          !con((ins _.KRCWM:$mask), Ins),
286                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
287                          (Select _.KRCWM:$mask, RHS, _.RC:$src0),
288                          Select, "$src0 = $dst", IsCommutable, IsKCommutable,
289                          IsKZCommutable>;
290
291// This multiclass generates the unconditional/non-masking, the masking and
292// the zero-masking variant of the scalar instruction.
293multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
294                           dag Outs, dag Ins, string OpcodeStr,
295                           string AttSrcAsm, string IntelSrcAsm,
296                           dag RHS,
297                           bit IsCommutable = 0> :
298   AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
299                   RHS, IsCommutable, 0, IsCommutable, X86selects>;
300
301// Similar to AVX512_maskable but in this case one of the source operands
302// ($src1) is already tied to $dst so we just use that for the preserved
303// vector elements.  NOTE that the NonTiedIns (the ins dag) should exclude
304// $src1.
305multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
306                                dag Outs, dag NonTiedIns, string OpcodeStr,
307                                string AttSrcAsm, string IntelSrcAsm,
308                                dag RHS,
309                                bit IsCommutable = 0,
310                                bit IsKCommutable = 0,
311                                SDNode Select = vselect,
312                                bit MaskOnly = 0> :
313   AVX512_maskable_common<O, F, _, Outs,
314                          !con((ins _.RC:$src1), NonTiedIns),
315                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
316                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
317                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
318                          !if(MaskOnly, (null_frag), RHS),
319                          (Select _.KRCWM:$mask, RHS, _.RC:$src1),
320                          Select, "", IsCommutable, IsKCommutable>;
321
322// Similar to AVX512_maskable_3src but in this case the input VT for the tied
323// operand differs from the output VT. This requires a bitconvert on
324// the preserved vector going into the vselect.
325// NOTE: The unmasked pattern is disabled.
326multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
327                                     X86VectorVTInfo InVT,
328                                     dag Outs, dag NonTiedIns, string OpcodeStr,
329                                     string AttSrcAsm, string IntelSrcAsm,
330                                     dag RHS, bit IsCommutable = 0> :
331   AVX512_maskable_common<O, F, OutVT, Outs,
332                          !con((ins InVT.RC:$src1), NonTiedIns),
333                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
334                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
335                          OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
336                          (vselect InVT.KRCWM:$mask, RHS,
337                           (bitconvert InVT.RC:$src1)),
338                           vselect, "", IsCommutable>;
339
340multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341                                     dag Outs, dag NonTiedIns, string OpcodeStr,
342                                     string AttSrcAsm, string IntelSrcAsm,
343                                     dag RHS,
344                                     bit IsCommutable = 0,
345                                     bit IsKCommutable = 0,
346                                     bit MaskOnly = 0> :
347   AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
348                        IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
349                        X86selects, MaskOnly>;
350
351multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
352                                  dag Outs, dag Ins,
353                                  string OpcodeStr,
354                                  string AttSrcAsm, string IntelSrcAsm,
355                                  list<dag> Pattern> :
356   AVX512_maskable_custom<O, F, Outs, Ins,
357                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
358                          !con((ins _.KRCWM:$mask), Ins),
359                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
360                          "$src0 = $dst">;
361
362multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
363                                       dag Outs, dag NonTiedIns,
364                                       string OpcodeStr,
365                                       string AttSrcAsm, string IntelSrcAsm,
366                                       list<dag> Pattern> :
367   AVX512_maskable_custom<O, F, Outs,
368                          !con((ins _.RC:$src1), NonTiedIns),
369                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
370                          !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
371                          OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
372                          "">;
373
374// Instruction with mask that puts result in mask register,
375// like "compare" and "vptest"
376multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
377                                  dag Outs,
378                                  dag Ins, dag MaskingIns,
379                                  string OpcodeStr,
380                                  string AttSrcAsm, string IntelSrcAsm,
381                                  list<dag> Pattern,
382                                  list<dag> MaskingPattern,
383                                  bit IsCommutable = 0> {
384    let isCommutable = IsCommutable in
385    def NAME: AVX512<O, F, Outs, Ins,
386                       OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
387                                     "$dst, "#IntelSrcAsm#"}",
388                       Pattern>;
389
390    def NAME#k: AVX512<O, F, Outs, MaskingIns,
391                       OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
392                                     "$dst {${mask}}, "#IntelSrcAsm#"}",
393                       MaskingPattern>, EVEX_K;
394}
395
396multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
397                                  dag Outs,
398                                  dag Ins, dag MaskingIns,
399                                  string OpcodeStr,
400                                  string AttSrcAsm, string IntelSrcAsm,
401                                  dag RHS, dag MaskingRHS,
402                                  bit IsCommutable = 0> :
403  AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
404                         AttSrcAsm, IntelSrcAsm,
405                         [(set _.KRC:$dst, RHS)],
406                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
407
408multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
409                           dag Outs, dag Ins, string OpcodeStr,
410                           string AttSrcAsm, string IntelSrcAsm,
411                           dag RHS, bit IsCommutable = 0> :
412   AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
413                          !con((ins _.KRCWM:$mask), Ins),
414                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
415                          (and _.KRCWM:$mask, RHS), IsCommutable>;
416
417multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
418                           dag Outs, dag Ins, string OpcodeStr,
419                           string AttSrcAsm, string IntelSrcAsm> :
420   AVX512_maskable_custom_cmp<O, F, Outs,
421                             Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
422                             AttSrcAsm, IntelSrcAsm, [], []>;
423
424// This multiclass generates the unconditional/non-masking, the masking and
425// the zero-masking variant of the vector instruction.  In the masking case, the
426// perserved vector elements come from a new dummy input operand tied to $dst.
427multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
428                           dag Outs, dag Ins, string OpcodeStr,
429                           string AttSrcAsm, string IntelSrcAsm,
430                           dag RHS, dag MaskedRHS,
431                           bit IsCommutable = 0, SDNode Select = vselect> :
432   AVX512_maskable_custom<O, F, Outs, Ins,
433                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
434                          !con((ins _.KRCWM:$mask), Ins),
435                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
436                          [(set _.RC:$dst, RHS)],
437                          [(set _.RC:$dst,
438                                (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
439                          [(set _.RC:$dst,
440                                (Select _.KRCWM:$mask, MaskedRHS,
441                                        _.ImmAllZerosV))],
442                          "$src0 = $dst", IsCommutable>;
443
444
445// Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
446// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
447// swizzled by ExecutionDomainFix to pxor.
448// We set canFoldAsLoad because this can be converted to a constant-pool
449// load of an all-zeros value if folding it would be beneficial.
450let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
451    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
452def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
453               [(set VR512:$dst, (v16i32 immAllZerosV))]>;
454def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
455               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
456}
457
458// Alias instructions that allow VPTERNLOG to be used with a mask to create
459// a mix of all ones and all zeros elements. This is done this way to force
460// the same register to be used as input for all three sources.
461let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
462def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
463                                (ins VK16WM:$mask), "",
464                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
465                                                      (v16i32 immAllOnesV),
466                                                      (v16i32 immAllZerosV)))]>;
467def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
468                                (ins VK8WM:$mask), "",
469                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
470                                           (bc_v8i64 (v16i32 immAllOnesV)),
471                                           (bc_v8i64 (v16i32 immAllZerosV))))]>;
472}
473
474let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
475    isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
476def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
477               [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
478def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
479               [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
480}
481
482// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
483// This is expanded by ExpandPostRAPseudos.
484let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
485    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
486  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
487                          [(set FR32X:$dst, fp32imm0)]>;
488  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
489                          [(set FR64X:$dst, fpimm0)]>;
490}
491
492//===----------------------------------------------------------------------===//
493// AVX-512 - VECTOR INSERT
494//
495
496// Supports two different pattern operators for mask and unmasked ops. Allows
497// null_frag to be passed for one.
498multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
499                                  X86VectorVTInfo To,
500                                  SDPatternOperator vinsert_insert,
501                                  SDPatternOperator vinsert_for_mask,
502                                  X86FoldableSchedWrite sched> {
503  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
504    defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
505                   (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
506                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
507                   "$src3, $src2, $src1", "$src1, $src2, $src3",
508                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
509                                         (From.VT From.RC:$src2),
510                                         (iPTR imm)),
511                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
512                                           (From.VT From.RC:$src2),
513                                           (iPTR imm))>,
514                   AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
515    let mayLoad = 1 in
516    defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
517                   (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
518                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
519                   "$src3, $src2, $src1", "$src1, $src2, $src3",
520                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
521                               (From.VT (bitconvert (From.LdFrag addr:$src2))),
522                               (iPTR imm)),
523                   (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
524                               (From.VT (bitconvert (From.LdFrag addr:$src2))),
525                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
526                   EVEX_CD8<From.EltSize, From.CD8TupleForm>,
527                   Sched<[sched.Folded, ReadAfterLd]>;
528  }
529}
530
531// Passes the same pattern operator for masked and unmasked ops.
532multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
533                            X86VectorVTInfo To,
534                            SDPatternOperator vinsert_insert,
535                            X86FoldableSchedWrite sched> :
536  vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
537
538multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
539                       X86VectorVTInfo To, PatFrag vinsert_insert,
540                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
541  let Predicates = p in {
542    def : Pat<(vinsert_insert:$ins
543                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
544              (To.VT (!cast<Instruction>(InstrStr#"rr")
545                     To.RC:$src1, From.RC:$src2,
546                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
547
548    def : Pat<(vinsert_insert:$ins
549                  (To.VT To.RC:$src1),
550                  (From.VT (bitconvert (From.LdFrag addr:$src2))),
551                  (iPTR imm)),
552              (To.VT (!cast<Instruction>(InstrStr#"rm")
553                  To.RC:$src1, addr:$src2,
554                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
555  }
556}
557
558multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
559                            ValueType EltVT64, int Opcode256,
560                            X86FoldableSchedWrite sched> {
561
562  let Predicates = [HasVLX] in
563    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
564                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
565                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
566                                 vinsert128_insert, sched>, EVEX_V256;
567
568  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
569                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
570                                 X86VectorVTInfo<16, EltVT32, VR512>,
571                                 vinsert128_insert, sched>, EVEX_V512;
572
573  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
574                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
575                                 X86VectorVTInfo< 8, EltVT64, VR512>,
576                                 vinsert256_insert, sched>, VEX_W, EVEX_V512;
577
578  // Even with DQI we'd like to only use these instructions for masking.
579  let Predicates = [HasVLX, HasDQI] in
580    defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
581                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
582                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
583                                   null_frag, vinsert128_insert, sched>,
584                                   VEX_W1X, EVEX_V256;
585
586  // Even with DQI we'd like to only use these instructions for masking.
587  let Predicates = [HasDQI] in {
588    defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
589                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
590                                 X86VectorVTInfo< 8, EltVT64, VR512>,
591                                 null_frag, vinsert128_insert, sched>,
592                                 VEX_W, EVEX_V512;
593
594    defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
595                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
596                                   X86VectorVTInfo<16, EltVT32, VR512>,
597                                   null_frag, vinsert256_insert, sched>,
598                                   EVEX_V512;
599  }
600}
601
602// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
603defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
604defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
605
606// Codegen pattern with the alternative types,
607// Even with AVX512DQ we'll still use these for unmasked operations.
608defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
609              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
610defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
611              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
612
613defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
614              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
615defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
616              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
617
618defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
619              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
620defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
621              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
622
623// Codegen pattern with the alternative types insert VEC128 into VEC256
624defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
625              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
626defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
627              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
628// Codegen pattern with the alternative types insert VEC128 into VEC512
629defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
630              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
631defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
632               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
633// Codegen pattern with the alternative types insert VEC256 into VEC512
634defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
635              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
636defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
637              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
638
639
640multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
641                                 X86VectorVTInfo To, X86VectorVTInfo Cast,
642                                 PatFrag vinsert_insert,
643                                 SDNodeXForm INSERT_get_vinsert_imm,
644                                 list<Predicate> p> {
645let Predicates = p in {
646  def : Pat<(Cast.VT
647             (vselect Cast.KRCWM:$mask,
648                      (bitconvert
649                       (vinsert_insert:$ins (To.VT To.RC:$src1),
650                                            (From.VT From.RC:$src2),
651                                            (iPTR imm))),
652                      Cast.RC:$src0)),
653            (!cast<Instruction>(InstrStr#"rrk")
654             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
655             (INSERT_get_vinsert_imm To.RC:$ins))>;
656  def : Pat<(Cast.VT
657             (vselect Cast.KRCWM:$mask,
658                      (bitconvert
659                       (vinsert_insert:$ins (To.VT To.RC:$src1),
660                                            (From.VT
661                                             (bitconvert
662                                              (From.LdFrag addr:$src2))),
663                                            (iPTR imm))),
664                      Cast.RC:$src0)),
665            (!cast<Instruction>(InstrStr#"rmk")
666             Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
667             (INSERT_get_vinsert_imm To.RC:$ins))>;
668
669  def : Pat<(Cast.VT
670             (vselect Cast.KRCWM:$mask,
671                      (bitconvert
672                       (vinsert_insert:$ins (To.VT To.RC:$src1),
673                                            (From.VT From.RC:$src2),
674                                            (iPTR imm))),
675                      Cast.ImmAllZerosV)),
676            (!cast<Instruction>(InstrStr#"rrkz")
677             Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
678             (INSERT_get_vinsert_imm To.RC:$ins))>;
679  def : Pat<(Cast.VT
680             (vselect Cast.KRCWM:$mask,
681                      (bitconvert
682                       (vinsert_insert:$ins (To.VT To.RC:$src1),
683                                            (From.VT
684                                             (bitconvert
685                                              (From.LdFrag addr:$src2))),
686                                            (iPTR imm))),
687                      Cast.ImmAllZerosV)),
688            (!cast<Instruction>(InstrStr#"rmkz")
689             Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
690             (INSERT_get_vinsert_imm To.RC:$ins))>;
691}
692}
693
694defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
695                             v8f32x_info, vinsert128_insert,
696                             INSERT_get_vinsert128_imm, [HasVLX]>;
697defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
698                             v4f64x_info, vinsert128_insert,
699                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
700
701defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
702                             v8i32x_info, vinsert128_insert,
703                             INSERT_get_vinsert128_imm, [HasVLX]>;
704defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
705                             v8i32x_info, vinsert128_insert,
706                             INSERT_get_vinsert128_imm, [HasVLX]>;
707defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
708                             v8i32x_info, vinsert128_insert,
709                             INSERT_get_vinsert128_imm, [HasVLX]>;
710defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
711                             v4i64x_info, vinsert128_insert,
712                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
713defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
714                             v4i64x_info, vinsert128_insert,
715                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
716defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
717                             v4i64x_info, vinsert128_insert,
718                             INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
719
720defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
721                             v16f32_info, vinsert128_insert,
722                             INSERT_get_vinsert128_imm, [HasAVX512]>;
723defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
724                             v8f64_info, vinsert128_insert,
725                             INSERT_get_vinsert128_imm, [HasDQI]>;
726
727defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
728                             v16i32_info, vinsert128_insert,
729                             INSERT_get_vinsert128_imm, [HasAVX512]>;
730defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
731                             v16i32_info, vinsert128_insert,
732                             INSERT_get_vinsert128_imm, [HasAVX512]>;
733defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
734                             v16i32_info, vinsert128_insert,
735                             INSERT_get_vinsert128_imm, [HasAVX512]>;
736defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
737                             v8i64_info, vinsert128_insert,
738                             INSERT_get_vinsert128_imm, [HasDQI]>;
739defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
740                             v8i64_info, vinsert128_insert,
741                             INSERT_get_vinsert128_imm, [HasDQI]>;
742defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
743                             v8i64_info, vinsert128_insert,
744                             INSERT_get_vinsert128_imm, [HasDQI]>;
745
746defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
747                             v16f32_info, vinsert256_insert,
748                             INSERT_get_vinsert256_imm, [HasDQI]>;
749defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
750                             v8f64_info, vinsert256_insert,
751                             INSERT_get_vinsert256_imm, [HasAVX512]>;
752
753defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
754                             v16i32_info, vinsert256_insert,
755                             INSERT_get_vinsert256_imm, [HasDQI]>;
756defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
757                             v16i32_info, vinsert256_insert,
758                             INSERT_get_vinsert256_imm, [HasDQI]>;
759defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
760                             v16i32_info, vinsert256_insert,
761                             INSERT_get_vinsert256_imm, [HasDQI]>;
762defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
763                             v8i64_info, vinsert256_insert,
764                             INSERT_get_vinsert256_imm, [HasAVX512]>;
765defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
766                             v8i64_info, vinsert256_insert,
767                             INSERT_get_vinsert256_imm, [HasAVX512]>;
768defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
769                             v8i64_info, vinsert256_insert,
770                             INSERT_get_vinsert256_imm, [HasAVX512]>;
771
772// vinsertps - insert f32 to XMM
773let ExeDomain = SSEPackedSingle in {
774def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
775      (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
776      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
777      [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
778      EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
779def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
780      (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
781      "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
782      [(set VR128X:$dst, (X86insertps VR128X:$src1,
783                          (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
784                          imm:$src3))]>,
785      EVEX_4V, EVEX_CD8<32, CD8VT1>,
786      Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
787}
788
789//===----------------------------------------------------------------------===//
790// AVX-512 VECTOR EXTRACT
791//---
792
793// Supports two different pattern operators for mask and unmasked ops. Allows
794// null_frag to be passed for one.
795multiclass vextract_for_size_split<int Opcode,
796                                   X86VectorVTInfo From, X86VectorVTInfo To,
797                                   SDPatternOperator vextract_extract,
798                                   SDPatternOperator vextract_for_mask,
799                                   SchedWrite SchedRR, SchedWrite SchedMR> {
800
801  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
802    defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
803                (ins From.RC:$src1, u8imm:$idx),
804                "vextract" # To.EltTypeName # "x" # To.NumElts,
805                "$idx, $src1", "$src1, $idx",
806                (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
807                (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
808                AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
809
810    def mr  : AVX512AIi8<Opcode, MRMDestMem, (outs),
811                    (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
812                    "vextract" # To.EltTypeName # "x" # To.NumElts #
813                        "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
814                    [(store (To.VT (vextract_extract:$idx
815                                    (From.VT From.RC:$src1), (iPTR imm))),
816                             addr:$dst)]>, EVEX,
817                    Sched<[SchedMR]>;
818
819    let mayStore = 1, hasSideEffects = 0 in
820    def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
821                    (ins To.MemOp:$dst, To.KRCWM:$mask,
822                                        From.RC:$src1, u8imm:$idx),
823                     "vextract" # To.EltTypeName # "x" # To.NumElts #
824                          "\t{$idx, $src1, $dst {${mask}}|"
825                          "$dst {${mask}}, $src1, $idx}", []>,
826                    EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
827  }
828}
829
830// Passes the same pattern operator for masked and unmasked ops.
831multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
832                             X86VectorVTInfo To,
833                             SDPatternOperator vextract_extract,
834                             SchedWrite SchedRR, SchedWrite SchedMR> :
835  vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
836
837// Codegen pattern for the alternative types
838multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
839                X86VectorVTInfo To, PatFrag vextract_extract,
840                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
841  let Predicates = p in {
842     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
843               (To.VT (!cast<Instruction>(InstrStr#"rr")
844                          From.RC:$src1,
845                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
846     def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
847                              (iPTR imm))), addr:$dst),
848               (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
849                (EXTRACT_get_vextract_imm To.RC:$ext))>;
850  }
851}
852
853multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
854                             ValueType EltVT64, int Opcode256,
855                             SchedWrite SchedRR, SchedWrite SchedMR> {
856  let Predicates = [HasAVX512] in {
857    defm NAME # "32x4Z" : vextract_for_size<Opcode128,
858                                   X86VectorVTInfo<16, EltVT32, VR512>,
859                                   X86VectorVTInfo< 4, EltVT32, VR128X>,
860                                   vextract128_extract, SchedRR, SchedMR>,
861                                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
862    defm NAME # "64x4Z" : vextract_for_size<Opcode256,
863                                   X86VectorVTInfo< 8, EltVT64, VR512>,
864                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
865                                   vextract256_extract, SchedRR, SchedMR>,
866                                       VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
867  }
868  let Predicates = [HasVLX] in
869    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
870                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
871                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
872                                 vextract128_extract, SchedRR, SchedMR>,
873                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
874
875  // Even with DQI we'd like to only use these instructions for masking.
876  let Predicates = [HasVLX, HasDQI] in
877    defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
878                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
879                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
880                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
881                                     VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
882
883  // Even with DQI we'd like to only use these instructions for masking.
884  let Predicates = [HasDQI] in {
885    defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
886                                 X86VectorVTInfo< 8, EltVT64, VR512>,
887                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
888                                 null_frag, vextract128_extract, SchedRR, SchedMR>,
889                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
890    defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
891                                 X86VectorVTInfo<16, EltVT32, VR512>,
892                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
893                                 null_frag, vextract256_extract, SchedRR, SchedMR>,
894                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
895  }
896}
897
898// TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
899defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
900defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
901
902// extract_subvector codegen patterns with the alternative types.
903// Even with AVX512DQ we'll still use these for unmasked operations.
904defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
905          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
906defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
907          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
908
909defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
910          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
911defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
912          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
913
914defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
915          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
916defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
917          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
918
919// Codegen pattern with the alternative types extract VEC128 from VEC256
920defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
921          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
922defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
923          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
924
925// Codegen pattern with the alternative types extract VEC128 from VEC512
926defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
927                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
928defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
929                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
930// Codegen pattern with the alternative types extract VEC256 from VEC512
931defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
932                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
933defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
934                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
935
936
937// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
938// smaller extract to enable EVEX->VEX.
939let Predicates = [NoVLX] in {
940def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
941          (v2i64 (VEXTRACTI128rr
942                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
943                  (iPTR 1)))>;
944def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
945          (v2f64 (VEXTRACTF128rr
946                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
947                  (iPTR 1)))>;
948def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
949          (v4i32 (VEXTRACTI128rr
950                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
951                  (iPTR 1)))>;
952def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
953          (v4f32 (VEXTRACTF128rr
954                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
955                  (iPTR 1)))>;
956def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
957          (v8i16 (VEXTRACTI128rr
958                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
959                  (iPTR 1)))>;
960def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
961          (v16i8 (VEXTRACTI128rr
962                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
963                  (iPTR 1)))>;
964}
965
966// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
967// smaller extract to enable EVEX->VEX.
968let Predicates = [HasVLX] in {
969def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
970          (v2i64 (VEXTRACTI32x4Z256rr
971                  (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
972                  (iPTR 1)))>;
973def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
974          (v2f64 (VEXTRACTF32x4Z256rr
975                  (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
976                  (iPTR 1)))>;
977def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
978          (v4i32 (VEXTRACTI32x4Z256rr
979                  (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
980                  (iPTR 1)))>;
981def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
982          (v4f32 (VEXTRACTF32x4Z256rr
983                  (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
984                  (iPTR 1)))>;
985def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
986          (v8i16 (VEXTRACTI32x4Z256rr
987                  (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
988                  (iPTR 1)))>;
989def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
990          (v16i8 (VEXTRACTI32x4Z256rr
991                  (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
992                  (iPTR 1)))>;
993}
994
995
996// Additional patterns for handling a bitcast between the vselect and the
997// extract_subvector.
998multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
999                                  X86VectorVTInfo To, X86VectorVTInfo Cast,
1000                                  PatFrag vextract_extract,
1001                                  SDNodeXForm EXTRACT_get_vextract_imm,
1002                                  list<Predicate> p> {
1003let Predicates = p in {
1004  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1005                              (bitconvert
1006                               (To.VT (vextract_extract:$ext
1007                                       (From.VT From.RC:$src), (iPTR imm)))),
1008                              To.RC:$src0)),
1009            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1010                      Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1011                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1012
1013  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1014                              (bitconvert
1015                               (To.VT (vextract_extract:$ext
1016                                       (From.VT From.RC:$src), (iPTR imm)))),
1017                              Cast.ImmAllZerosV)),
1018            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1019                      Cast.KRCWM:$mask, From.RC:$src,
1020                      (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1021}
1022}
1023
1024defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1025                              v4f32x_info, vextract128_extract,
1026                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1027defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1028                              v2f64x_info, vextract128_extract,
1029                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1030
1031defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1032                              v4i32x_info, vextract128_extract,
1033                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1034defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1035                              v4i32x_info, vextract128_extract,
1036                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1037defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1038                              v4i32x_info, vextract128_extract,
1039                              EXTRACT_get_vextract128_imm, [HasVLX]>;
1040defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1041                              v2i64x_info, vextract128_extract,
1042                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1043defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1044                              v2i64x_info, vextract128_extract,
1045                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1046defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1047                              v2i64x_info, vextract128_extract,
1048                              EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1049
1050defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1051                              v4f32x_info, vextract128_extract,
1052                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1053defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1054                              v2f64x_info, vextract128_extract,
1055                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1056
1057defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1058                              v4i32x_info, vextract128_extract,
1059                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1060defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1061                              v4i32x_info, vextract128_extract,
1062                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1063defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1064                              v4i32x_info, vextract128_extract,
1065                              EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1067                              v2i64x_info, vextract128_extract,
1068                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1069defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1070                              v2i64x_info, vextract128_extract,
1071                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1072defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1073                              v2i64x_info, vextract128_extract,
1074                              EXTRACT_get_vextract128_imm, [HasDQI]>;
1075
1076defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1077                              v8f32x_info, vextract256_extract,
1078                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1079defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1080                              v4f64x_info, vextract256_extract,
1081                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1082
1083defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1084                              v8i32x_info, vextract256_extract,
1085                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1086defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1087                              v8i32x_info, vextract256_extract,
1088                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1089defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1090                              v8i32x_info, vextract256_extract,
1091                              EXTRACT_get_vextract256_imm, [HasDQI]>;
1092defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1093                              v4i64x_info, vextract256_extract,
1094                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1095defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1096                              v4i64x_info, vextract256_extract,
1097                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1098defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1099                              v4i64x_info, vextract256_extract,
1100                              EXTRACT_get_vextract256_imm, [HasAVX512]>;
1101
1102// vextractps - extract 32 bits from XMM
1103def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1104      (ins VR128X:$src1, u8imm:$src2),
1105      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1106      [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1107      EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1108
1109def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1110      (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1111      "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1112      [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1113                          addr:$dst)]>,
1114      EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1115
1116//===---------------------------------------------------------------------===//
1117// AVX-512 BROADCAST
1118//---
1119// broadcast with a scalar argument.
1120multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1121                            string Name,
1122                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1123  def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1124            (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1125             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1126  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1127                                  (X86VBroadcast SrcInfo.FRC:$src),
1128                                  DestInfo.RC:$src0)),
1129            (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1130             DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1131             (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1132  def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1133                                  (X86VBroadcast SrcInfo.FRC:$src),
1134                                  DestInfo.ImmAllZerosV)),
1135            (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1136             DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1137}
1138
1139// Split version to allow mask and broadcast node to be different types. This
1140// helps support the 32x2 broadcasts.
1141multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1142                                     string Name,
1143                                     SchedWrite SchedRR, SchedWrite SchedRM,
1144                                     X86VectorVTInfo MaskInfo,
1145                                     X86VectorVTInfo DestInfo,
1146                                     X86VectorVTInfo SrcInfo,
1147                                     SDPatternOperator UnmaskedOp = X86VBroadcast> {
1148  let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1149  defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1150                   (outs MaskInfo.RC:$dst),
1151                   (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1152                   (MaskInfo.VT
1153                    (bitconvert
1154                     (DestInfo.VT
1155                      (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1156                   (MaskInfo.VT
1157                    (bitconvert
1158                     (DestInfo.VT
1159                      (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1160                   T8PD, EVEX, Sched<[SchedRR]>;
1161  let mayLoad = 1 in
1162  defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1163                   (outs MaskInfo.RC:$dst),
1164                   (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1165                   (MaskInfo.VT
1166                    (bitconvert
1167                     (DestInfo.VT (UnmaskedOp
1168                                   (SrcInfo.ScalarLdFrag addr:$src))))),
1169                   (MaskInfo.VT
1170                    (bitconvert
1171                     (DestInfo.VT (X86VBroadcast
1172                                   (SrcInfo.ScalarLdFrag addr:$src)))))>,
1173                   T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1174                   Sched<[SchedRM]>;
1175  }
1176
1177  def : Pat<(MaskInfo.VT
1178             (bitconvert
1179              (DestInfo.VT (UnmaskedOp
1180                            (SrcInfo.VT (scalar_to_vector
1181                                         (SrcInfo.ScalarLdFrag addr:$src))))))),
1182            (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1183  def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1184                          (bitconvert
1185                           (DestInfo.VT
1186                            (X86VBroadcast
1187                             (SrcInfo.VT (scalar_to_vector
1188                                          (SrcInfo.ScalarLdFrag addr:$src)))))),
1189                          MaskInfo.RC:$src0)),
1190            (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1191             MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1192  def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1193                          (bitconvert
1194                           (DestInfo.VT
1195                            (X86VBroadcast
1196                             (SrcInfo.VT (scalar_to_vector
1197                                          (SrcInfo.ScalarLdFrag addr:$src)))))),
1198                          MaskInfo.ImmAllZerosV)),
1199            (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1200             MaskInfo.KRCWM:$mask, addr:$src)>;
1201}
1202
1203// Helper class to force mask and broadcast result to same type.
1204multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1205                               SchedWrite SchedRR, SchedWrite SchedRM,
1206                               X86VectorVTInfo DestInfo,
1207                               X86VectorVTInfo SrcInfo> :
1208  avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1209                            DestInfo, DestInfo, SrcInfo>;
1210
1211multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1212                                                       AVX512VLVectorVTInfo _> {
1213  let Predicates = [HasAVX512] in {
1214    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1215                                  WriteFShuffle256Ld, _.info512, _.info128>,
1216              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1217                                      _.info128>,
1218              EVEX_V512;
1219  }
1220
1221  let Predicates = [HasVLX] in {
1222    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1223                                     WriteFShuffle256Ld, _.info256, _.info128>,
1224                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1225                                         _.info128>,
1226                 EVEX_V256;
1227  }
1228}
1229
1230multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1231                                                       AVX512VLVectorVTInfo _> {
1232  let Predicates = [HasAVX512] in {
1233    defm Z  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1234                                  WriteFShuffle256Ld, _.info512, _.info128>,
1235              avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1236                                      _.info128>,
1237              EVEX_V512;
1238  }
1239
1240  let Predicates = [HasVLX] in {
1241    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1242                                     WriteFShuffle256Ld, _.info256, _.info128>,
1243                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1244                                         _.info128>,
1245                 EVEX_V256;
1246    defm Z128  : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1247                                     WriteFShuffle256Ld, _.info128, _.info128>,
1248                 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1249                                         _.info128>,
1250                 EVEX_V128;
1251  }
1252}
1253defm VBROADCASTSS  : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1254                                       avx512vl_f32_info>;
1255defm VBROADCASTSD  : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1256                                       avx512vl_f64_info>, VEX_W1X;
1257
1258multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1259                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1260                                    RegisterClass SrcRC> {
1261  let ExeDomain = _.ExeDomain in
1262  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1263                         (ins SrcRC:$src),
1264                         "vpbroadcast"##_.Suffix, "$src", "$src",
1265                         (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1266                         Sched<[SchedRR]>;
1267}
1268
1269multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1270                                    X86VectorVTInfo _, SDPatternOperator OpNode,
1271                                    RegisterClass SrcRC, SubRegIndex Subreg> {
1272  let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1273  defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1274                        (outs _.RC:$dst), (ins GR32:$src),
1275                        !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1276                        !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1277                        "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1278                        "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1279
1280  def : Pat <(_.VT (OpNode SrcRC:$src)),
1281             (!cast<Instruction>(Name#r)
1282              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1283
1284  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1285             (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1286              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1287
1288  def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1289             (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1290              (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1291}
1292
1293multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1294                      AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1295                      RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1296  let Predicates = [prd] in
1297    defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1298              OpNode, SrcRC, Subreg>, EVEX_V512;
1299  let Predicates = [prd, HasVLX] in {
1300    defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1301              _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1302    defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1303              _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1304  }
1305}
1306
1307multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1308                                       SDPatternOperator OpNode,
1309                                       RegisterClass SrcRC, Predicate prd> {
1310  let Predicates = [prd] in
1311    defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1312                                      SrcRC>, EVEX_V512;
1313  let Predicates = [prd, HasVLX] in {
1314    defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1315                                         SrcRC>, EVEX_V256;
1316    defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1317                                         SrcRC>, EVEX_V128;
1318  }
1319}
1320
1321defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1322                       avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1323defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1324                       avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1325                       HasBWI>;
1326defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1327                                                 X86VBroadcast, GR32, HasAVX512>;
1328defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1329                                                 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1330
1331// Provide aliases for broadcast from the same register class that
1332// automatically does the extract.
1333multiclass avx512_int_broadcast_rm_lowering<string Name,
1334                                            X86VectorVTInfo DestInfo,
1335                                            X86VectorVTInfo SrcInfo,
1336                                            X86VectorVTInfo ExtInfo> {
1337  def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1338            (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1339                (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
1340}
1341
1342multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1343                                        AVX512VLVectorVTInfo _, Predicate prd> {
1344  let Predicates = [prd] in {
1345    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1346                                   WriteShuffle256Ld, _.info512, _.info128>,
1347               avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
1348                                  EVEX_V512;
1349    // Defined separately to avoid redefinition.
1350    defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
1351  }
1352  let Predicates = [prd, HasVLX] in {
1353    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1354                                    WriteShuffle256Ld, _.info256, _.info128>,
1355                avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
1356                                 EVEX_V256;
1357    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1358                                    WriteShuffleXLd, _.info128, _.info128>,
1359                                 EVEX_V128;
1360  }
1361}
1362
1363defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1364                                           avx512vl_i8_info, HasBWI>;
1365defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1366                                           avx512vl_i16_info, HasBWI>;
1367defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1368                                           avx512vl_i32_info, HasAVX512>;
1369defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1370                                           avx512vl_i64_info, HasAVX512>, VEX_W1X;
1371
1372multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1373                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1374  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1375                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1376                           (_Dst.VT (X86SubVBroadcast
1377                             (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1378                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1379                           AVX5128IBase, EVEX;
1380}
1381
1382// This should be used for the AVX512DQ broadcast instructions. It disables
1383// the unmasked patterns so that we only use the DQ instructions when masking
1384//  is requested.
1385multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1386                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1387  let hasSideEffects = 0, mayLoad = 1 in
1388  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1389                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1390                           (null_frag),
1391                           (_Dst.VT (X86SubVBroadcast
1392                             (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1393                           Sched<[SchedWriteShuffle.YMM.Folded]>,
1394                           AVX5128IBase, EVEX;
1395}
1396
1397let Predicates = [HasAVX512] in {
1398  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1399  def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1400            (VPBROADCASTQZm addr:$src)>;
1401}
1402
1403let Predicates = [HasVLX] in {
1404  // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1405  def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1406            (VPBROADCASTQZ128m addr:$src)>;
1407  def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1408            (VPBROADCASTQZ256m addr:$src)>;
1409}
1410let Predicates = [HasVLX, HasBWI] in {
1411  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1412  // This means we'll encounter truncated i32 loads; match that here.
1413  def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1414            (VPBROADCASTWZ128m addr:$src)>;
1415  def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1416            (VPBROADCASTWZ256m addr:$src)>;
1417  def : Pat<(v8i16 (X86VBroadcast
1418              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1419            (VPBROADCASTWZ128m addr:$src)>;
1420  def : Pat<(v16i16 (X86VBroadcast
1421              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1422            (VPBROADCASTWZ256m addr:$src)>;
1423}
1424
1425//===----------------------------------------------------------------------===//
1426// AVX-512 BROADCAST SUBVECTORS
1427//
1428
1429defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1430                       v16i32_info, v4i32x_info>,
1431                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1432defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1433                       v16f32_info, v4f32x_info>,
1434                       EVEX_V512, EVEX_CD8<32, CD8VT4>;
1435defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1436                       v8i64_info, v4i64x_info>, VEX_W,
1437                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1438defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1439                       v8f64_info, v4f64x_info>, VEX_W,
1440                       EVEX_V512, EVEX_CD8<64, CD8VT4>;
1441
1442let Predicates = [HasAVX512] in {
1443def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1444          (VBROADCASTF64X4rm addr:$src)>;
1445def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1446          (VBROADCASTI64X4rm addr:$src)>;
1447def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1448          (VBROADCASTI64X4rm addr:$src)>;
1449def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1450          (VBROADCASTI64X4rm addr:$src)>;
1451
1452// Provide fallback in case the load node that is used in the patterns above
1453// is used by additional users, which prevents the pattern selection.
1454def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1455          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1456                           (v4f64 VR256X:$src), 1)>;
1457def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1458          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1459                           (v8f32 VR256X:$src), 1)>;
1460def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1461          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1462                           (v4i64 VR256X:$src), 1)>;
1463def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1464          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1465                           (v8i32 VR256X:$src), 1)>;
1466def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1467          (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468                           (v16i16 VR256X:$src), 1)>;
1469def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1470          (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1471                           (v32i8 VR256X:$src), 1)>;
1472
1473def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1474          (VBROADCASTF32X4rm addr:$src)>;
1475def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1476          (VBROADCASTI32X4rm addr:$src)>;
1477def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1478          (VBROADCASTI32X4rm addr:$src)>;
1479def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1480          (VBROADCASTI32X4rm addr:$src)>;
1481
1482// Patterns for selects of bitcasted operations.
1483def : Pat<(vselect VK16WM:$mask,
1484                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1485                   (bc_v16f32 (v16i32 immAllZerosV))),
1486          (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1487def : Pat<(vselect VK16WM:$mask,
1488                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1489                   VR512:$src0),
1490          (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1491def : Pat<(vselect VK16WM:$mask,
1492                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1493                   (v16i32 immAllZerosV)),
1494          (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1495def : Pat<(vselect VK16WM:$mask,
1496                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1497                   VR512:$src0),
1498          (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1499
1500def : Pat<(vselect VK8WM:$mask,
1501                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1502                   (bc_v8f64 (v16i32 immAllZerosV))),
1503          (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1504def : Pat<(vselect VK8WM:$mask,
1505                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1506                   VR512:$src0),
1507          (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1508def : Pat<(vselect VK8WM:$mask,
1509                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1510                   (bc_v8i64 (v16i32 immAllZerosV))),
1511          (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1512def : Pat<(vselect VK8WM:$mask,
1513                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
1514                   VR512:$src0),
1515          (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1516}
1517
1518let Predicates = [HasVLX] in {
1519defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1520                           v8i32x_info, v4i32x_info>,
1521                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1522defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1523                           v8f32x_info, v4f32x_info>,
1524                           EVEX_V256, EVEX_CD8<32, CD8VT4>;
1525
1526def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1527          (VBROADCASTF32X4Z256rm addr:$src)>;
1528def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1529          (VBROADCASTI32X4Z256rm addr:$src)>;
1530def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1531          (VBROADCASTI32X4Z256rm addr:$src)>;
1532def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1533          (VBROADCASTI32X4Z256rm addr:$src)>;
1534
1535// Patterns for selects of bitcasted operations.
1536def : Pat<(vselect VK8WM:$mask,
1537                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1538                   (bc_v8f32 (v8i32 immAllZerosV))),
1539          (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1540def : Pat<(vselect VK8WM:$mask,
1541                   (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1542                   VR256X:$src0),
1543          (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1544def : Pat<(vselect VK8WM:$mask,
1545                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1546                   (v8i32 immAllZerosV)),
1547          (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1548def : Pat<(vselect VK8WM:$mask,
1549                   (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1550                   VR256X:$src0),
1551          (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1552
1553
1554// Provide fallback in case the load node that is used in the patterns above
1555// is used by additional users, which prevents the pattern selection.
1556def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1557          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1558                              (v2f64 VR128X:$src), 1)>;
1559def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1560          (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1561                              (v4f32 VR128X:$src), 1)>;
1562def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1563          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1564                              (v2i64 VR128X:$src), 1)>;
1565def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1566          (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1567                              (v4i32 VR128X:$src), 1)>;
1568def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1569          (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1570                              (v8i16 VR128X:$src), 1)>;
1571def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1572          (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1573                              (v16i8 VR128X:$src), 1)>;
1574}
1575
1576let Predicates = [HasVLX, HasDQI] in {
1577defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1578                           v4i64x_info, v2i64x_info>, VEX_W1X,
1579                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1580defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1581                           v4f64x_info, v2f64x_info>, VEX_W1X,
1582                           EVEX_V256, EVEX_CD8<64, CD8VT2>;
1583
1584// Patterns for selects of bitcasted operations.
1585def : Pat<(vselect VK4WM:$mask,
1586                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1587                   (bc_v4f64 (v8i32 immAllZerosV))),
1588          (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1589def : Pat<(vselect VK4WM:$mask,
1590                   (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1591                   VR256X:$src0),
1592          (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1593def : Pat<(vselect VK4WM:$mask,
1594                   (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1595                   (bc_v4i64 (v8i32 immAllZerosV))),
1596          (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1597def : Pat<(vselect VK4WM:$mask,
1598                   (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1599                   VR256X:$src0),
1600          (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1601}
1602
1603let Predicates = [HasDQI] in {
1604defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1605                       v8i64_info, v2i64x_info>, VEX_W,
1606                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1607defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1608                       v16i32_info, v8i32x_info>,
1609                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1610defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1611                       v8f64_info, v2f64x_info>, VEX_W,
1612                       EVEX_V512, EVEX_CD8<64, CD8VT2>;
1613defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1614                       v16f32_info, v8f32x_info>,
1615                       EVEX_V512, EVEX_CD8<32, CD8VT8>;
1616
1617// Patterns for selects of bitcasted operations.
1618def : Pat<(vselect VK16WM:$mask,
1619                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1620                   (bc_v16f32 (v16i32 immAllZerosV))),
1621          (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1622def : Pat<(vselect VK16WM:$mask,
1623                   (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1624                   VR512:$src0),
1625          (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1626def : Pat<(vselect VK16WM:$mask,
1627                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1628                   (v16i32 immAllZerosV)),
1629          (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1630def : Pat<(vselect VK16WM:$mask,
1631                   (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1632                   VR512:$src0),
1633          (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1634
1635def : Pat<(vselect VK8WM:$mask,
1636                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1637                   (bc_v8f64 (v16i32 immAllZerosV))),
1638          (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1639def : Pat<(vselect VK8WM:$mask,
1640                   (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1641                   VR512:$src0),
1642          (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1643def : Pat<(vselect VK8WM:$mask,
1644                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1645                   (bc_v8i64 (v16i32 immAllZerosV))),
1646          (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1647def : Pat<(vselect VK8WM:$mask,
1648                   (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
1649                   VR512:$src0),
1650          (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1651}
1652
1653multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1654                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1655  let Predicates = [HasDQI] in
1656    defm Z :    avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1657                                          WriteShuffle256Ld, _Dst.info512,
1658                                          _Src.info512, _Src.info128, null_frag>,
1659                                          EVEX_V512;
1660  let Predicates = [HasDQI, HasVLX] in
1661    defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1662                                          WriteShuffle256Ld, _Dst.info256,
1663                                          _Src.info256, _Src.info128, null_frag>,
1664                                          EVEX_V256;
1665}
1666
1667multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1668                         AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1669  avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1670
1671  let Predicates = [HasDQI, HasVLX] in
1672    defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1673                                          WriteShuffleXLd, _Dst.info128,
1674                                          _Src.info128, _Src.info128, null_frag>,
1675                                          EVEX_V128;
1676}
1677
1678defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1679                                          avx512vl_i32_info, avx512vl_i64_info>;
1680defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1681                                          avx512vl_f32_info, avx512vl_f64_info>;
1682
1683let Predicates = [HasVLX] in {
1684def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1685          (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1686def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1687          (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1688}
1689
1690def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1691          (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
1692def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1693          (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1694
1695def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1696          (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
1697def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1698          (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1699
1700//===----------------------------------------------------------------------===//
1701// AVX-512 BROADCAST MASK TO VECTOR REGISTER
1702//---
1703multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1704                                  X86VectorVTInfo _, RegisterClass KRC> {
1705  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1706                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1707                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1708                  EVEX, Sched<[WriteShuffle]>;
1709}
1710
1711multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1712                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1713  let Predicates = [HasCDI] in
1714    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1715  let Predicates = [HasCDI, HasVLX] in {
1716    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1717    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1718  }
1719}
1720
1721defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1722                                               avx512vl_i32_info, VK16>;
1723defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1724                                               avx512vl_i64_info, VK8>, VEX_W;
1725
1726//===----------------------------------------------------------------------===//
1727// -- VPERMI2 - 3 source operands form --
1728multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1729                         X86FoldableSchedWrite sched,
1730                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1731let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1732    hasSideEffects = 0 in {
1733  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1734          (ins _.RC:$src2, _.RC:$src3),
1735          OpcodeStr, "$src3, $src2", "$src2, $src3",
1736          (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1737          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1738
1739  let mayLoad = 1 in
1740  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1741            (ins _.RC:$src2, _.MemOp:$src3),
1742            OpcodeStr, "$src3, $src2", "$src2, $src3",
1743            (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1744                   (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
1745            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1746  }
1747}
1748
1749multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1750                            X86FoldableSchedWrite sched,
1751                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1752  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1753      hasSideEffects = 0, mayLoad = 1 in
1754  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1755              (ins _.RC:$src2, _.ScalarMemOp:$src3),
1756              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1757              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1758              (_.VT (X86VPermt2 _.RC:$src2,
1759               IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1760              AVX5128IBase, EVEX_4V, EVEX_B,
1761              Sched<[sched.Folded, ReadAfterLd]>;
1762}
1763
1764multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1765                               X86FoldableSchedWrite sched,
1766                               AVX512VLVectorVTInfo VTInfo,
1767                               AVX512VLVectorVTInfo ShuffleMask> {
1768  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1769                           ShuffleMask.info512>,
1770            avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1771                             ShuffleMask.info512>, EVEX_V512;
1772  let Predicates = [HasVLX] in {
1773  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1774                               ShuffleMask.info128>,
1775                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1776                                  ShuffleMask.info128>, EVEX_V128;
1777  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1778                               ShuffleMask.info256>,
1779                 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1780                                  ShuffleMask.info256>, EVEX_V256;
1781  }
1782}
1783
1784multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1785                                  X86FoldableSchedWrite sched,
1786                                  AVX512VLVectorVTInfo VTInfo,
1787                                  AVX512VLVectorVTInfo Idx,
1788                                  Predicate Prd> {
1789  let Predicates = [Prd] in
1790  defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1791                           Idx.info512>, EVEX_V512;
1792  let Predicates = [Prd, HasVLX] in {
1793  defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1794                               Idx.info128>, EVEX_V128;
1795  defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1796                               Idx.info256>,  EVEX_V256;
1797  }
1798}
1799
1800defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1801                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1802defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1803                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1804defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1805                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1806                  VEX_W, EVEX_CD8<16, CD8VF>;
1807defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1808                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1809                  EVEX_CD8<8, CD8VF>;
1810defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1811                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1812defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1813                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1814
1815// Extra patterns to deal with extra bitcasts due to passthru and index being
1816// different types on the fp versions.
1817multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1818                                  X86VectorVTInfo IdxVT,
1819                                  X86VectorVTInfo CastVT> {
1820  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1821                             (X86VPermt2 (_.VT _.RC:$src2),
1822                                         (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1823                             (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1824            (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1825                                                _.RC:$src2, _.RC:$src3)>;
1826  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1827                             (X86VPermt2 _.RC:$src2,
1828                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1829                                         (_.LdFrag addr:$src3)),
1830                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1831            (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1832                                                _.RC:$src2, addr:$src3)>;
1833  def : Pat<(_.VT (vselect _.KRCWM:$mask,
1834                             (X86VPermt2 _.RC:$src2,
1835                                         (IdxVT.VT (bitconvert  (CastVT.VT _.RC:$src1))),
1836                                         (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1837                             (_.VT (bitconvert  (CastVT.VT _.RC:$src1))))),
1838            (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1839                                                 _.RC:$src2, addr:$src3)>;
1840}
1841
1842// TODO: Should we add more casts? The vXi64 case is common due to ABI.
1843defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1844defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1845defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1846
1847// VPERMT2
1848multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1849                         X86FoldableSchedWrite sched,
1850                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1851let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1852  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1853          (ins IdxVT.RC:$src2, _.RC:$src3),
1854          OpcodeStr, "$src3, $src2", "$src2, $src3",
1855          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1856          EVEX_4V, AVX5128IBase, Sched<[sched]>;
1857
1858  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1859            (ins IdxVT.RC:$src2, _.MemOp:$src3),
1860            OpcodeStr, "$src3, $src2", "$src2, $src3",
1861            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1862                   (bitconvert (_.LdFrag addr:$src3)))), 1>,
1863            EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1864  }
1865}
1866multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1867                            X86FoldableSchedWrite sched,
1868                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1869  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1870  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1871              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1872              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
1873              !strconcat("$src2, ${src3}", _.BroadcastStr ),
1874              (_.VT (X86VPermt2 _.RC:$src1,
1875               IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1876              AVX5128IBase, EVEX_4V, EVEX_B,
1877              Sched<[sched.Folded, ReadAfterLd]>;
1878}
1879
1880multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1881                               X86FoldableSchedWrite sched,
1882                               AVX512VLVectorVTInfo VTInfo,
1883                               AVX512VLVectorVTInfo ShuffleMask> {
1884  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1885                              ShuffleMask.info512>,
1886            avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1887                              ShuffleMask.info512>, EVEX_V512;
1888  let Predicates = [HasVLX] in {
1889  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1890                              ShuffleMask.info128>,
1891                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1892                              ShuffleMask.info128>, EVEX_V128;
1893  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1894                              ShuffleMask.info256>,
1895                 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1896                              ShuffleMask.info256>, EVEX_V256;
1897  }
1898}
1899
1900multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1901                                  X86FoldableSchedWrite sched,
1902                                  AVX512VLVectorVTInfo VTInfo,
1903                                  AVX512VLVectorVTInfo Idx, Predicate Prd> {
1904  let Predicates = [Prd] in
1905  defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1906                           Idx.info512>, EVEX_V512;
1907  let Predicates = [Prd, HasVLX] in {
1908  defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909                               Idx.info128>, EVEX_V128;
1910  defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1911                               Idx.info256>, EVEX_V256;
1912  }
1913}
1914
1915defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1916                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1917defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1918                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1919defm VPERMT2W  : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1920                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1921                  VEX_W, EVEX_CD8<16, CD8VF>;
1922defm VPERMT2B  : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1923                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1924                  EVEX_CD8<8, CD8VF>;
1925defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1926                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1927defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1928                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1929
1930//===----------------------------------------------------------------------===//
1931// AVX-512 - BLEND using mask
1932//
1933
1934multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1935                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1936  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1937  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1938             (ins _.RC:$src1, _.RC:$src2),
1939             !strconcat(OpcodeStr,
1940             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1941             EVEX_4V, Sched<[sched]>;
1942  def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1943             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1944             !strconcat(OpcodeStr,
1945             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1946             []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1947  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1948             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1949             !strconcat(OpcodeStr,
1950             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1951             []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1952  let mayLoad = 1 in {
1953  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1954             (ins _.RC:$src1, _.MemOp:$src2),
1955             !strconcat(OpcodeStr,
1956             "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1957             []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1958             Sched<[sched.Folded, ReadAfterLd]>;
1959  def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1960             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1961             !strconcat(OpcodeStr,
1962             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1963             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1964             Sched<[sched.Folded, ReadAfterLd]>;
1965  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1966             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1967             !strconcat(OpcodeStr,
1968             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1969             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1970             Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
1971  }
1972  }
1973}
1974multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1975                                 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1976  let mayLoad = 1, hasSideEffects = 0 in {
1977  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1978      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1979       !strconcat(OpcodeStr,
1980            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1981            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1982      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1983      Sched<[sched.Folded, ReadAfterLd]>;
1984
1985  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1986      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1987       !strconcat(OpcodeStr,
1988            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1989            "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1990      EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1991      Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
1992
1993  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1994      (ins _.RC:$src1, _.ScalarMemOp:$src2),
1995       !strconcat(OpcodeStr,
1996            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1997            "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1998      EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1999      Sched<[sched.Folded, ReadAfterLd]>;
2000  }
2001}
2002
2003multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2004                        AVX512VLVectorVTInfo VTInfo> {
2005  defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2006           WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2007                                 EVEX_V512;
2008
2009  let Predicates = [HasVLX] in {
2010    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2011                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2012                                      EVEX_V256;
2013    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2014                WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2015                                      EVEX_V128;
2016  }
2017}
2018
2019multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2020                        AVX512VLVectorVTInfo VTInfo> {
2021  let Predicates = [HasBWI] in
2022    defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2023                               EVEX_V512;
2024
2025  let Predicates = [HasBWI, HasVLX] in {
2026    defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2027                                  EVEX_V256;
2028    defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2029                                  EVEX_V128;
2030  }
2031}
2032
2033defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2034                              avx512vl_f32_info>;
2035defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2036                              avx512vl_f64_info>, VEX_W;
2037defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2038                              avx512vl_i32_info>;
2039defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2040                              avx512vl_i64_info>, VEX_W;
2041defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2042                              avx512vl_i8_info>;
2043defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2044                              avx512vl_i16_info>, VEX_W;
2045
2046//===----------------------------------------------------------------------===//
2047// Compare Instructions
2048//===----------------------------------------------------------------------===//
2049
2050// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2051
2052multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2053                             X86FoldableSchedWrite sched> {
2054  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2055                      (outs _.KRC:$dst),
2056                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2057                      "vcmp${cc}"#_.Suffix,
2058                      "$src2, $src1", "$src1, $src2",
2059                      (OpNode (_.VT _.RC:$src1),
2060                              (_.VT _.RC:$src2),
2061                              imm:$cc)>, EVEX_4V, Sched<[sched]>;
2062  let mayLoad = 1 in
2063  defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2064                    (outs _.KRC:$dst),
2065                    (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
2066                    "vcmp${cc}"#_.Suffix,
2067                    "$src2, $src1", "$src1, $src2",
2068                    (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2069                        imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2070                    Sched<[sched.Folded, ReadAfterLd]>;
2071
2072  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2073                     (outs _.KRC:$dst),
2074                     (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2075                     "vcmp${cc}"#_.Suffix,
2076                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2077                     (OpNodeRnd (_.VT _.RC:$src1),
2078                                (_.VT _.RC:$src2),
2079                                imm:$cc,
2080                                (i32 FROUND_NO_EXC))>,
2081                     EVEX_4V, EVEX_B, Sched<[sched]>;
2082  // Accept explicit immediate argument form instead of comparison code.
2083  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2084    defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2085                        (outs VK1:$dst),
2086                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2087                        "vcmp"#_.Suffix,
2088                        "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
2089                        Sched<[sched]>, NotMemoryFoldable;
2090  let mayLoad = 1 in
2091    defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2092                        (outs _.KRC:$dst),
2093                        (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2094                        "vcmp"#_.Suffix,
2095                        "$cc, $src2, $src1", "$src1, $src2, $cc">,
2096                        EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2097                        Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2098
2099    defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2100                       (outs _.KRC:$dst),
2101                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2102                       "vcmp"#_.Suffix,
2103                       "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
2104                       EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2105  }// let isAsmParserOnly = 1, hasSideEffects = 0
2106
2107  let isCodeGenOnly = 1 in {
2108    let isCommutable = 1 in
2109    def rr : AVX512Ii8<0xC2, MRMSrcReg,
2110                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2111                !strconcat("vcmp${cc}", _.Suffix,
2112                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2113                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2114                                          _.FRC:$src2,
2115                                          imm:$cc))]>,
2116                EVEX_4V, Sched<[sched]>;
2117    def rm : AVX512Ii8<0xC2, MRMSrcMem,
2118              (outs _.KRC:$dst),
2119              (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2120              !strconcat("vcmp${cc}", _.Suffix,
2121                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2122              [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2123                                        (_.ScalarLdFrag addr:$src2),
2124                                        imm:$cc))]>,
2125              EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2126              Sched<[sched.Folded, ReadAfterLd]>;
2127  }
2128}
2129
2130let Predicates = [HasAVX512] in {
2131  let ExeDomain = SSEPackedSingle in
2132  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2133                                   SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2134  let ExeDomain = SSEPackedDouble in
2135  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2136                                   SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2137}
2138
2139multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2140                              X86FoldableSchedWrite sched, X86VectorVTInfo _,
2141                              bit IsCommutable> {
2142  let isCommutable = IsCommutable in
2143  def rr : AVX512BI<opc, MRMSrcReg,
2144             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2145             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2146             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2147             EVEX_4V, Sched<[sched]>;
2148  def rm : AVX512BI<opc, MRMSrcMem,
2149             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2150             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2151             [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2152                                       (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
2153             EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2154  let isCommutable = IsCommutable in
2155  def rrk : AVX512BI<opc, MRMSrcReg,
2156              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2157              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2158                          "$dst {${mask}}, $src1, $src2}"),
2159              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2160                                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2161              EVEX_4V, EVEX_K, Sched<[sched]>;
2162  def rmk : AVX512BI<opc, MRMSrcMem,
2163              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2164              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2165                          "$dst {${mask}}, $src1, $src2}"),
2166              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2167                                   (OpNode (_.VT _.RC:$src1),
2168                                       (_.VT (bitconvert
2169                                              (_.LdFrag addr:$src2))))))]>,
2170              EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2171}
2172
2173multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2174                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
2175                                  bit IsCommutable> :
2176           avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
2177  def rmb : AVX512BI<opc, MRMSrcMem,
2178              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2179              !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2180                                    "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2181              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2182                              (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2183              EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2184  def rmbk : AVX512BI<opc, MRMSrcMem,
2185               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2186                                       _.ScalarMemOp:$src2),
2187               !strconcat(OpcodeStr,
2188                          "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2189                          "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2190               [(set _.KRC:$dst, (and _.KRCWM:$mask,
2191                                      (OpNode (_.VT _.RC:$src1),
2192                                        (X86VBroadcast
2193                                          (_.ScalarLdFrag addr:$src2)))))]>,
2194               EVEX_4V, EVEX_K, EVEX_B,
2195               Sched<[sched.Folded, ReadAfterLd]>;
2196}
2197
2198multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2199                                 X86SchedWriteWidths sched,
2200                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2201                                 bit IsCommutable = 0> {
2202  let Predicates = [prd] in
2203  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
2204                              VTInfo.info512, IsCommutable>, EVEX_V512;
2205
2206  let Predicates = [prd, HasVLX] in {
2207    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
2208                                   VTInfo.info256, IsCommutable>, EVEX_V256;
2209    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
2210                                   VTInfo.info128, IsCommutable>, EVEX_V128;
2211  }
2212}
2213
2214multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2215                                     PatFrag OpNode, X86SchedWriteWidths sched,
2216                                     AVX512VLVectorVTInfo VTInfo,
2217                                     Predicate prd, bit IsCommutable = 0> {
2218  let Predicates = [prd] in
2219  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
2220                                  VTInfo.info512, IsCommutable>, EVEX_V512;
2221
2222  let Predicates = [prd, HasVLX] in {
2223    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
2224                                       VTInfo.info256, IsCommutable>, EVEX_V256;
2225    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
2226                                       VTInfo.info128, IsCommutable>, EVEX_V128;
2227  }
2228}
2229
2230// This fragment treats X86cmpm as commutable to help match loads in both
2231// operands for PCMPEQ.
2232def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2233def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2234                           (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2235def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2236                         (setcc node:$src1, node:$src2, SETGT)>;
2237
2238// AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2239// increase the pattern complexity the way an immediate would.
2240let AddedComplexity = 2 in {
2241// FIXME: Is there a better scheduler class for VPCMP?
2242defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
2243                      SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2244                EVEX_CD8<8, CD8VF>, VEX_WIG;
2245
2246defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
2247                      SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2248                EVEX_CD8<16, CD8VF>, VEX_WIG;
2249
2250defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
2251                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2252                EVEX_CD8<32, CD8VF>;
2253
2254defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
2255                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2256                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2257
2258defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2259                      SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2260                EVEX_CD8<8, CD8VF>, VEX_WIG;
2261
2262defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2263                      SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2264                EVEX_CD8<16, CD8VF>, VEX_WIG;
2265
2266defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2267                      SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2268                EVEX_CD8<32, CD8VF>;
2269
2270defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2271                      SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2272                T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2273}
2274
2275multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2276                          PatFrag CommFrag, X86FoldableSchedWrite sched,
2277                          X86VectorVTInfo _, string Name> {
2278  let isCommutable = 1 in
2279  def rri : AVX512AIi8<opc, MRMSrcReg,
2280             (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2281             !strconcat("vpcmp${cc}", Suffix,
2282                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2283             [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2284                                                (_.VT _.RC:$src2),
2285                                                cond)))]>,
2286             EVEX_4V, Sched<[sched]>;
2287  def rmi : AVX512AIi8<opc, MRMSrcMem,
2288             (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2289             !strconcat("vpcmp${cc}", Suffix,
2290                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2291             [(set _.KRC:$dst, (_.KVT
2292                                (Frag:$cc
2293                                 (_.VT _.RC:$src1),
2294                                 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2295                                 cond)))]>,
2296             EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
2297  let isCommutable = 1 in
2298  def rrik : AVX512AIi8<opc, MRMSrcReg,
2299              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2300                                      AVX512ICC:$cc),
2301              !strconcat("vpcmp${cc}", Suffix,
2302                         "\t{$src2, $src1, $dst {${mask}}|",
2303                         "$dst {${mask}}, $src1, $src2}"),
2304              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2305                                     (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2306                                                      (_.VT _.RC:$src2),
2307                                                      cond))))]>,
2308              EVEX_4V, EVEX_K, Sched<[sched]>;
2309  def rmik : AVX512AIi8<opc, MRMSrcMem,
2310              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2311                                    AVX512ICC:$cc),
2312              !strconcat("vpcmp${cc}", Suffix,
2313                         "\t{$src2, $src1, $dst {${mask}}|",
2314                         "$dst {${mask}}, $src1, $src2}"),
2315              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2316                                     (_.KVT
2317                                      (Frag:$cc
2318                                       (_.VT _.RC:$src1),
2319                                       (_.VT (bitconvert
2320                                              (_.LdFrag addr:$src2))),
2321                                       cond))))]>,
2322              EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2323
2324  // Accept explicit immediate argument form instead of comparison code.
2325  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2326    def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2327               (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2328               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2329                          "$dst, $src1, $src2, $cc}"), []>,
2330               EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
2331    let mayLoad = 1 in
2332    def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2333               (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2334               !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2335                          "$dst, $src1, $src2, $cc}"), []>,
2336               EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
2337    def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2338               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2339                                       u8imm:$cc),
2340               !strconcat("vpcmp", Suffix,
2341                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2342                          "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2343               EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
2344    let mayLoad = 1 in
2345    def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2346               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2347                                       u8imm:$cc),
2348               !strconcat("vpcmp", Suffix,
2349                          "\t{$cc, $src2, $src1, $dst {${mask}}|",
2350                          "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2351               EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
2352               NotMemoryFoldable;
2353  }
2354
2355  def : Pat<(_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2356                                 (_.VT _.RC:$src1), cond)),
2357            (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2358             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2359
2360  def : Pat<(and _.KRCWM:$mask,
2361                 (_.KVT (CommFrag:$cc (bitconvert (_.LdFrag addr:$src2)),
2362                                      (_.VT _.RC:$src1), cond))),
2363            (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2364             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2365             (CommFrag.OperandTransform $cc))>;
2366}
2367
2368multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2369                              PatFrag CommFrag, X86FoldableSchedWrite sched,
2370                              X86VectorVTInfo _, string Name> :
2371           avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
2372  def rmib : AVX512AIi8<opc, MRMSrcMem,
2373             (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2374                                     AVX512ICC:$cc),
2375             !strconcat("vpcmp${cc}", Suffix,
2376                        "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2377                        "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2378             [(set _.KRC:$dst, (_.KVT (Frag:$cc
2379                                       (_.VT _.RC:$src1),
2380                                       (X86VBroadcast
2381                                        (_.ScalarLdFrag addr:$src2)),
2382                                       cond)))]>,
2383             EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2384  def rmibk : AVX512AIi8<opc, MRMSrcMem,
2385              (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2386                                       _.ScalarMemOp:$src2, AVX512ICC:$cc),
2387              !strconcat("vpcmp${cc}", Suffix,
2388                       "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2389                       "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2390              [(set _.KRC:$dst, (and _.KRCWM:$mask,
2391                                     (_.KVT (Frag:$cc
2392                                             (_.VT _.RC:$src1),
2393                                             (X86VBroadcast
2394                                              (_.ScalarLdFrag addr:$src2)),
2395                                             cond))))]>,
2396              EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2397
2398  // Accept explicit immediate argument form instead of comparison code.
2399  let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2400    def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2401               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2402                                       u8imm:$cc),
2403               !strconcat("vpcmp", Suffix,
2404                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2405                   "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2406               EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2407               NotMemoryFoldable;
2408    def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2409               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2410                                       _.ScalarMemOp:$src2, u8imm:$cc),
2411               !strconcat("vpcmp", Suffix,
2412                  "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2413                  "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2414               EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2415               NotMemoryFoldable;
2416  }
2417
2418  def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2419                    (_.VT _.RC:$src1), cond)),
2420            (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2421             _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2422
2423  def : Pat<(and _.KRCWM:$mask,
2424                 (_.KVT (CommFrag:$cc (X86VBroadcast
2425                                       (_.ScalarLdFrag addr:$src2)),
2426                                      (_.VT _.RC:$src1), cond))),
2427            (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2428             _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2429             (CommFrag.OperandTransform $cc))>;
2430}
2431
2432multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2433                             PatFrag CommFrag, X86SchedWriteWidths sched,
2434                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2435  let Predicates = [prd] in
2436  defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
2437                          VTInfo.info512, NAME>, EVEX_V512;
2438
2439  let Predicates = [prd, HasVLX] in {
2440    defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
2441                               VTInfo.info256, NAME>, EVEX_V256;
2442    defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
2443                               VTInfo.info128, NAME>, EVEX_V128;
2444  }
2445}
2446
2447multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2448                                 PatFrag CommFrag, X86SchedWriteWidths sched,
2449                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2450  let Predicates = [prd] in
2451  defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
2452                              VTInfo.info512, NAME>, EVEX_V512;
2453
2454  let Predicates = [prd, HasVLX] in {
2455    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
2456                                    VTInfo.info256, NAME>, EVEX_V256;
2457    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
2458                                   VTInfo.info128, NAME>, EVEX_V128;
2459  }
2460}
2461
2462def X86pcmpm_imm : SDNodeXForm<setcc, [{
2463  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2464  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2465  return getI8Imm(SSECC, SDLoc(N));
2466}]>;
2467
2468// Swapped operand version of the above.
2469def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2470  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2471  uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2472  SSECC = X86::getSwappedVPCMPImm(SSECC);
2473  return getI8Imm(SSECC, SDLoc(N));
2474}]>;
2475
2476def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2477                       (setcc node:$src1, node:$src2, node:$cc), [{
2478  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2479  return !ISD::isUnsignedIntSetCC(CC);
2480}], X86pcmpm_imm>;
2481
2482// Same as above, but commutes immediate. Use for load folding.
2483def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2484                               (setcc node:$src1, node:$src2, node:$cc), [{
2485  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2486  return !ISD::isUnsignedIntSetCC(CC);
2487}], X86pcmpm_imm_commute>;
2488
2489def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2490                        (setcc node:$src1, node:$src2, node:$cc), [{
2491  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2492  return ISD::isUnsignedIntSetCC(CC);
2493}], X86pcmpm_imm>;
2494
2495// Same as above, but commutes immediate. Use for load folding.
2496def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2497                                (setcc node:$src1, node:$src2, node:$cc), [{
2498  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2499  return ISD::isUnsignedIntSetCC(CC);
2500}], X86pcmpm_imm_commute>;
2501
2502// FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2503defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
2504                                SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2505                                EVEX_CD8<8, CD8VF>;
2506defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
2507                                 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2508                                 EVEX_CD8<8, CD8VF>;
2509
2510defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
2511                                SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2512                                VEX_W, EVEX_CD8<16, CD8VF>;
2513defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
2514                                 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2515                                 VEX_W, EVEX_CD8<16, CD8VF>;
2516
2517defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
2518                                    SchedWriteVecALU, avx512vl_i32_info,
2519                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
2520defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
2521                                     SchedWriteVecALU, avx512vl_i32_info,
2522                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
2523
2524defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
2525                                    SchedWriteVecALU, avx512vl_i64_info,
2526                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2527defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
2528                                     SchedWriteVecALU, avx512vl_i64_info,
2529                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2530
2531multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2532                              string Name> {
2533  defm  rri  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2534                   (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2535                   "vcmp${cc}"#_.Suffix,
2536                   "$src2, $src1", "$src1, $src2",
2537                   (X86cmpm (_.VT _.RC:$src1),
2538                         (_.VT _.RC:$src2),
2539                           imm:$cc), 1>,
2540                   Sched<[sched]>;
2541
2542  defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2543                (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2544                "vcmp${cc}"#_.Suffix,
2545                "$src2, $src1", "$src1, $src2",
2546                (X86cmpm (_.VT _.RC:$src1),
2547                        (_.VT (bitconvert (_.LdFrag addr:$src2))),
2548                        imm:$cc)>,
2549                Sched<[sched.Folded, ReadAfterLd]>;
2550
2551  defm  rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2552                (outs _.KRC:$dst),
2553                (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2554                "vcmp${cc}"#_.Suffix,
2555                "${src2}"##_.BroadcastStr##", $src1",
2556                "$src1, ${src2}"##_.BroadcastStr,
2557                (X86cmpm (_.VT _.RC:$src1),
2558                        (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2559                        imm:$cc)>,
2560                EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2561  // Accept explicit immediate argument form instead of comparison code.
2562  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2563    defm  rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2564                         (outs _.KRC:$dst),
2565                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2566                         "vcmp"#_.Suffix,
2567                         "$cc, $src2, $src1", "$src1, $src2, $cc">,
2568                         Sched<[sched]>, NotMemoryFoldable;
2569
2570    let mayLoad = 1 in {
2571      defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2572                             (outs _.KRC:$dst),
2573                             (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2574                             "vcmp"#_.Suffix,
2575                             "$cc, $src2, $src1", "$src1, $src2, $cc">,
2576                             Sched<[sched.Folded, ReadAfterLd]>,
2577                             NotMemoryFoldable;
2578
2579      defm  rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2580                         (outs _.KRC:$dst),
2581                         (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2582                         "vcmp"#_.Suffix,
2583                         "$cc, ${src2}"##_.BroadcastStr##", $src1",
2584                         "$src1, ${src2}"##_.BroadcastStr##", $cc">,
2585                         EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
2586                         NotMemoryFoldable;
2587    }
2588  }
2589
2590  // Patterns for selecting with loads in other operand.
2591  def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2592                     CommutableCMPCC:$cc),
2593            (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2594                                                      imm:$cc)>;
2595
2596  def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2597                                         (_.VT _.RC:$src1),
2598                                         CommutableCMPCC:$cc)),
2599            (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2600                                                       _.RC:$src1, addr:$src2,
2601                                                       imm:$cc)>;
2602
2603  def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2604                     (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2605            (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2606                                                       imm:$cc)>;
2607
2608  def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2609                                          (_.ScalarLdFrag addr:$src2)),
2610                                         (_.VT _.RC:$src1),
2611                                         CommutableCMPCC:$cc)),
2612            (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2613                                                        _.RC:$src1, addr:$src2,
2614                                                        imm:$cc)>;
2615}
2616
2617multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2618  // comparison code form (VCMP[EQ/LT/LE/...]
2619  defm  rrib  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2620                     (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2621                     "vcmp${cc}"#_.Suffix,
2622                     "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2623                     (X86cmpmRnd (_.VT _.RC:$src1),
2624                                    (_.VT _.RC:$src2),
2625                                    imm:$cc,
2626                                (i32 FROUND_NO_EXC))>,
2627                     EVEX_B, Sched<[sched]>;
2628
2629  let isAsmParserOnly = 1, hasSideEffects = 0 in {
2630    defm  rrib_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2631                         (outs _.KRC:$dst),
2632                         (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2633                         "vcmp"#_.Suffix,
2634                         "$cc, {sae}, $src2, $src1",
2635                         "$src1, $src2, {sae}, $cc">,
2636                         EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2637   }
2638}
2639
2640multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2641  let Predicates = [HasAVX512] in {
2642    defm Z    : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2643                avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2644
2645  }
2646  let Predicates = [HasAVX512,HasVLX] in {
2647   defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2648   defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2649  }
2650}
2651
2652defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2653                          AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2654defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2655                          AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2656
2657// Patterns to select fp compares with load as first operand.
2658let Predicates = [HasAVX512] in {
2659  def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2660                            CommutableCMPCC:$cc)),
2661            (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2662
2663  def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2664                            CommutableCMPCC:$cc)),
2665            (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2666}
2667
2668// ----------------------------------------------------------------
2669// FPClass
2670//handle fpclass instruction  mask =  op(reg_scalar,imm)
2671//                                    op(mem_scalar,imm)
2672multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2673                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2674                                 Predicate prd> {
2675  let Predicates = [prd], ExeDomain = _.ExeDomain in {
2676      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2677                      (ins _.RC:$src1, i32u8imm:$src2),
2678                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2679                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2680                              (i32 imm:$src2)))]>,
2681                      Sched<[sched]>;
2682      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2683                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2684                      OpcodeStr##_.Suffix#
2685                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2686                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2687                                      (OpNode (_.VT _.RC:$src1),
2688                                      (i32 imm:$src2))))]>,
2689                      EVEX_K, Sched<[sched]>;
2690    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2691                    (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2692                    OpcodeStr##_.Suffix##
2693                              "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2694                    [(set _.KRC:$dst,
2695                          (OpNode _.ScalarIntMemCPat:$src1,
2696                                  (i32 imm:$src2)))]>,
2697                    Sched<[sched.Folded, ReadAfterLd]>;
2698    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2699                    (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2700                    OpcodeStr##_.Suffix##
2701                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2702                    [(set _.KRC:$dst,(and _.KRCWM:$mask,
2703                        (OpNode _.ScalarIntMemCPat:$src1,
2704                            (i32 imm:$src2))))]>,
2705                    EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2706  }
2707}
2708
2709//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2710//                                  fpclass(reg_vec, mem_vec, imm)
2711//                                  fpclass(reg_vec, broadcast(eltVt), imm)
2712multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2713                                 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2714                                 string mem, string broadcast>{
2715  let ExeDomain = _.ExeDomain in {
2716  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2717                      (ins _.RC:$src1, i32u8imm:$src2),
2718                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2719                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2720                                       (i32 imm:$src2)))]>,
2721                      Sched<[sched]>;
2722  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2723                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2724                      OpcodeStr##_.Suffix#
2725                      "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2726                      [(set _.KRC:$dst,(and _.KRCWM:$mask,
2727                                       (OpNode (_.VT _.RC:$src1),
2728                                       (i32 imm:$src2))))]>,
2729                      EVEX_K, Sched<[sched]>;
2730  def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2731                    (ins _.MemOp:$src1, i32u8imm:$src2),
2732                    OpcodeStr##_.Suffix##mem#
2733                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2734                    [(set _.KRC:$dst,(OpNode
2735                                     (_.VT (bitconvert (_.LdFrag addr:$src1))),
2736                                     (i32 imm:$src2)))]>,
2737                    Sched<[sched.Folded, ReadAfterLd]>;
2738  def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2739                    (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2740                    OpcodeStr##_.Suffix##mem#
2741                    "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2742                    [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
2743                                  (_.VT (bitconvert (_.LdFrag addr:$src1))),
2744                                  (i32 imm:$src2))))]>,
2745                    EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
2746  def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2747                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2748                    OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2749                                      _.BroadcastStr##", $dst|$dst, ${src1}"
2750                                                  ##_.BroadcastStr##", $src2}",
2751                    [(set _.KRC:$dst,(OpNode
2752                                     (_.VT (X86VBroadcast
2753                                           (_.ScalarLdFrag addr:$src1))),
2754                                     (i32 imm:$src2)))]>,
2755                    EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
2756  def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2757                    (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2758                    OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2759                          _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2760                                                   _.BroadcastStr##", $src2}",
2761                    [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
2762                                     (_.VT (X86VBroadcast
2763                                           (_.ScalarLdFrag addr:$src1))),
2764                                     (i32 imm:$src2))))]>,
2765                    EVEX_B, EVEX_K,  Sched<[sched.Folded, ReadAfterLd]>;
2766  }
2767}
2768
2769multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2770                                     bits<8> opc, SDNode OpNode,
2771                                     X86SchedWriteWidths sched, Predicate prd,
2772                                     string broadcast>{
2773  let Predicates = [prd] in {
2774    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
2775                                      _.info512, "{z}", broadcast>, EVEX_V512;
2776  }
2777  let Predicates = [prd, HasVLX] in {
2778    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
2779                                      _.info128, "{x}", broadcast>, EVEX_V128;
2780    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
2781                                      _.info256, "{y}", broadcast>, EVEX_V256;
2782  }
2783}
2784
2785multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2786                                 bits<8> opcScalar, SDNode VecOpNode,
2787                                 SDNode ScalarOpNode, X86SchedWriteWidths sched,
2788                                 Predicate prd> {
2789  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec,
2790                                      VecOpNode, sched, prd, "{l}">,
2791                                      EVEX_CD8<32, CD8VF>;
2792  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec,
2793                                      VecOpNode, sched, prd, "{q}">,
2794                                      EVEX_CD8<64, CD8VF> , VEX_W;
2795  defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2796                                   sched.Scl, f32x_info, prd>,
2797                                   EVEX_CD8<32, CD8VT1>;
2798  defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2799                                   sched.Scl, f64x_info, prd>,
2800                                   EVEX_CD8<64, CD8VT1>, VEX_W;
2801}
2802
2803defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2804                                      X86Vfpclasss, SchedWriteFCmp, HasDQI>,
2805                                      AVX512AIi8Base, EVEX;
2806
2807//-----------------------------------------------------------------
2808// Mask register copy, including
2809// - copy between mask registers
2810// - load/store mask registers
2811// - copy from GPR to mask register and vice versa
2812//
2813multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2814                         string OpcodeStr, RegisterClass KRC,
2815                         ValueType vvt, X86MemOperand x86memop> {
2816  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2817  def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2818             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2819             Sched<[WriteMove]>;
2820  def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2821             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2822             [(set KRC:$dst, (vvt (load addr:$src)))]>,
2823             Sched<[WriteLoad]>;
2824  def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2825             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2826             [(store KRC:$src, addr:$dst)]>,
2827             Sched<[WriteStore]>;
2828}
2829
2830multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2831                             string OpcodeStr,
2832                             RegisterClass KRC, RegisterClass GRC> {
2833  let hasSideEffects = 0 in {
2834    def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2835               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2836               Sched<[WriteMove]>;
2837    def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2838               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2839               Sched<[WriteMove]>;
2840  }
2841}
2842
2843let Predicates = [HasDQI] in
2844  defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2845               avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2846               VEX, PD;
2847
2848let Predicates = [HasAVX512] in
2849  defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2850               avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2851               VEX, PS;
2852
2853let Predicates = [HasBWI] in {
2854  defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2855               VEX, PD, VEX_W;
2856  defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2857               VEX, XD;
2858  defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2859               VEX, PS, VEX_W;
2860  defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2861               VEX, XD, VEX_W;
2862}
2863
2864// GR from/to mask register
2865def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2866          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2867def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2868          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2869
2870def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2871          (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2872def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2873          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2874
2875def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2876          (KMOVWrk VK16:$src)>;
2877def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2878          (COPY_TO_REGCLASS VK16:$src, GR32)>;
2879
2880def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2881          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2882def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2883          (COPY_TO_REGCLASS VK8:$src, GR32)>;
2884
2885def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2886          (COPY_TO_REGCLASS GR32:$src, VK32)>;
2887def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2888          (COPY_TO_REGCLASS VK32:$src, GR32)>;
2889def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2890          (COPY_TO_REGCLASS GR64:$src, VK64)>;
2891def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2892          (COPY_TO_REGCLASS VK64:$src, GR64)>;
2893
2894// Load/store kreg
2895let Predicates = [HasDQI] in {
2896  def : Pat<(store VK1:$src, addr:$dst),
2897            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2898
2899  def : Pat<(v1i1 (load addr:$src)),
2900            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2901  def : Pat<(v2i1 (load addr:$src)),
2902            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2903  def : Pat<(v4i1 (load addr:$src)),
2904            (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2905}
2906
2907let Predicates = [HasAVX512] in {
2908  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2909            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2910}
2911
2912let Predicates = [HasAVX512] in {
2913  multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2914    def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2915              (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2916
2917    def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2918              (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2919  }
2920
2921  defm : operation_gpr_mask_copy_lowering<VK1,  v1i1>;
2922  defm : operation_gpr_mask_copy_lowering<VK2,  v2i1>;
2923  defm : operation_gpr_mask_copy_lowering<VK4,  v4i1>;
2924  defm : operation_gpr_mask_copy_lowering<VK8,  v8i1>;
2925  defm : operation_gpr_mask_copy_lowering<VK16,  v16i1>;
2926  defm : operation_gpr_mask_copy_lowering<VK32,  v32i1>;
2927  defm : operation_gpr_mask_copy_lowering<VK64,  v64i1>;
2928
2929  def : Pat<(insert_subvector (v16i1 immAllZerosV),
2930                              (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2931            (COPY_TO_REGCLASS
2932             (KMOVWkr (AND32ri8
2933                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2934                       (i32 1))), VK16)>;
2935}
2936
2937// Mask unary operation
2938// - KNOT
2939multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2940                            RegisterClass KRC, SDPatternOperator OpNode,
2941                            X86FoldableSchedWrite sched, Predicate prd> {
2942  let Predicates = [prd] in
2943    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2944               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2945               [(set KRC:$dst, (OpNode KRC:$src))]>,
2946               Sched<[sched]>;
2947}
2948
2949multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2950                                SDPatternOperator OpNode,
2951                                X86FoldableSchedWrite sched> {
2952  defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2953                            sched, HasDQI>, VEX, PD;
2954  defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2955                            sched, HasAVX512>, VEX, PS;
2956  defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2957                            sched, HasBWI>, VEX, PD, VEX_W;
2958  defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2959                            sched, HasBWI>, VEX, PS, VEX_W;
2960}
2961
2962// TODO - do we need a X86SchedWriteWidths::KMASK type?
2963defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2964
2965// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2966let Predicates = [HasAVX512, NoDQI] in
2967def : Pat<(vnot VK8:$src),
2968          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2969
2970def : Pat<(vnot VK4:$src),
2971          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2972def : Pat<(vnot VK2:$src),
2973          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2974
2975// Mask binary operation
2976// - KAND, KANDN, KOR, KXNOR, KXOR
2977multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2978                           RegisterClass KRC, SDPatternOperator OpNode,
2979                           X86FoldableSchedWrite sched, Predicate prd,
2980                           bit IsCommutable> {
2981  let Predicates = [prd], isCommutable = IsCommutable in
2982    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2983               !strconcat(OpcodeStr,
2984                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2985               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2986               Sched<[sched]>;
2987}
2988
2989multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2990                                 SDPatternOperator OpNode,
2991                                 X86FoldableSchedWrite sched, bit IsCommutable,
2992                                 Predicate prdW = HasAVX512> {
2993  defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2994                             sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2995  defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2996                             sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2997  defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2998                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2999  defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3000                             sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3001}
3002
3003def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3004def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3005// These nodes use 'vnot' instead of 'not' to support vectors.
3006def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3007def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3008
3009// TODO - do we need a X86SchedWriteWidths::KMASK type?
3010defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XMM, 1>;
3011defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
3012defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
3013defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
3014defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
3015defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3016
3017multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3018                            Instruction Inst> {
3019  // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3020  // for the DQI set, this type is legal and KxxxB instruction is used
3021  let Predicates = [NoDQI] in
3022  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3023            (COPY_TO_REGCLASS
3024              (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3025                    (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3026
3027  // All types smaller than 8 bits require conversion anyway
3028  def : Pat<(OpNode VK1:$src1, VK1:$src2),
3029        (COPY_TO_REGCLASS (Inst
3030                           (COPY_TO_REGCLASS VK1:$src1, VK16),
3031                           (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3032  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3033        (COPY_TO_REGCLASS (Inst
3034                           (COPY_TO_REGCLASS VK2:$src1, VK16),
3035                           (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3036  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3037        (COPY_TO_REGCLASS (Inst
3038                           (COPY_TO_REGCLASS VK4:$src1, VK16),
3039                           (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3040}
3041
3042defm : avx512_binop_pat<and,   and,  KANDWrr>;
3043defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3044defm : avx512_binop_pat<or,    or,   KORWrr>;
3045defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3046defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
3047
3048// Mask unpacking
3049multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
3050                             RegisterClass KRCSrc, X86FoldableSchedWrite sched,
3051                             Predicate prd> {
3052  let Predicates = [prd] in {
3053    let hasSideEffects = 0 in
3054    def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
3055               (ins KRC:$src1, KRC:$src2),
3056               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3057               VEX_4V, VEX_L, Sched<[sched]>;
3058
3059    def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
3060              (!cast<Instruction>(NAME##rr)
3061                        (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
3062                        (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
3063  }
3064}
3065
3066defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
3067defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
3068defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
3069
3070// Mask bit testing
3071multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3072                              SDNode OpNode, X86FoldableSchedWrite sched,
3073                              Predicate prd> {
3074  let Predicates = [prd], Defs = [EFLAGS] in
3075    def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3076               !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3077               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3078               Sched<[sched]>;
3079}
3080
3081multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3082                                X86FoldableSchedWrite sched,
3083                                Predicate prdW = HasAVX512> {
3084  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3085                                                                VEX, PD;
3086  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3087                                                                VEX, PS;
3088  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3089                                                                VEX, PS, VEX_W;
3090  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3091                                                                VEX, PD, VEX_W;
3092}
3093
3094// TODO - do we need a X86SchedWriteWidths::KMASK type?
3095defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3096defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3097
3098// Mask shift
3099multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3100                               SDNode OpNode, X86FoldableSchedWrite sched> {
3101  let Predicates = [HasAVX512] in
3102    def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3103                 !strconcat(OpcodeStr,
3104                            "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3105                            [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3106                 Sched<[sched]>;
3107}
3108
3109multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3110                                 SDNode OpNode, X86FoldableSchedWrite sched> {
3111  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3112                               sched>, VEX, TAPD, VEX_W;
3113  let Predicates = [HasDQI] in
3114  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3115                               sched>, VEX, TAPD;
3116  let Predicates = [HasBWI] in {
3117  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3118                               sched>, VEX, TAPD, VEX_W;
3119  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3120                               sched>, VEX, TAPD;
3121  }
3122}
3123
3124defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3125defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3126
3127// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3128multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
3129                                              X86VectorVTInfo Narrow,
3130                                              X86VectorVTInfo Wide> {
3131  def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3132                              (Narrow.VT Narrow.RC:$src2))),
3133          (COPY_TO_REGCLASS
3134           (!cast<Instruction>(InstStr#"Zrr")
3135            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3136            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3137           Narrow.KRC)>;
3138
3139  def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3140                             (Frag (Narrow.VT Narrow.RC:$src1),
3141                                   (Narrow.VT Narrow.RC:$src2)))),
3142          (COPY_TO_REGCLASS
3143           (!cast<Instruction>(InstStr#"Zrrk")
3144            (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3145            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3146            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3147           Narrow.KRC)>;
3148}
3149
3150// Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3151multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
3152                                                 string InstStr,
3153                                                 X86VectorVTInfo Narrow,
3154                                                 X86VectorVTInfo Wide> {
3155def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3156                                (Narrow.VT Narrow.RC:$src2), cond)),
3157          (COPY_TO_REGCLASS
3158           (!cast<Instruction>(InstStr##Zrri)
3159            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3160            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3161            (Frag.OperandTransform $cc)), Narrow.KRC)>;
3162
3163def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3164                           (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3165                                                 (Narrow.VT Narrow.RC:$src2),
3166                                                 cond)))),
3167          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3168           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3169           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3170           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3171           (Frag.OperandTransform $cc)), Narrow.KRC)>;
3172}
3173
3174// Same as above, but for fp types which don't use PatFrags.
3175multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
3176                                                X86VectorVTInfo Narrow,
3177                                                X86VectorVTInfo Wide> {
3178def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3179                              (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3180          (COPY_TO_REGCLASS
3181           (!cast<Instruction>(InstStr##Zrri)
3182            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3183            (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3184            imm:$cc), Narrow.KRC)>;
3185
3186def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3187                           (OpNode (Narrow.VT Narrow.RC:$src1),
3188                                   (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3189          (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3190           (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3191           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3192           (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3193           imm:$cc), Narrow.KRC)>;
3194}
3195
3196let Predicates = [HasAVX512, NoVLX] in {
3197  // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3198  // increase the pattern complexity the way an immediate would.
3199  let AddedComplexity = 2 in {
3200  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
3201  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
3202
3203  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
3204  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
3205
3206  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3207  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3208
3209  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3210  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3211  }
3212
3213  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3214  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
3215
3216  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3217  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
3218
3219  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3220  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
3221
3222  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3223  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
3224
3225  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3226  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3227  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3228  defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3229}
3230
3231let Predicates = [HasBWI, NoVLX] in {
3232  // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3233  // increase the pattern complexity the way an immediate would.
3234  let AddedComplexity = 2 in {
3235  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
3236  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
3237
3238  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
3239  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
3240
3241  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
3242  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
3243
3244  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
3245  defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
3246  }
3247
3248  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3249  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
3250
3251  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3252  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
3253
3254  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3255  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
3256
3257  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3258  defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
3259}
3260
3261// Mask setting all 0s or 1s
3262multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3263  let Predicates = [HasAVX512] in
3264    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3265        SchedRW = [WriteZero] in
3266      def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3267                     [(set KRC:$dst, (VT Val))]>;
3268}
3269
3270multiclass avx512_mask_setop_w<PatFrag Val> {
3271  defm W : avx512_mask_setop<VK16, v16i1, Val>;
3272  defm D : avx512_mask_setop<VK32,  v32i1, Val>;
3273  defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3274}
3275
3276defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3277defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3278
3279// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3280let Predicates = [HasAVX512] in {
3281  def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3282  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3283  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3284  def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3285  def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
3286  def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
3287  def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
3288  def : Pat<(v1i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK1)>;
3289}
3290
3291// Patterns for kmask insert_subvector/extract_subvector to/from index=0
3292multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3293                                             RegisterClass RC, ValueType VT> {
3294  def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3295            (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3296
3297  def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3298            (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3299}
3300defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK2,  v2i1>;
3301defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK4,  v4i1>;
3302defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK8,  v8i1>;
3303defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK16, v16i1>;
3304defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK32, v32i1>;
3305defm : operation_subvector_mask_lowering<VK1,  v1i1,  VK64, v64i1>;
3306
3307defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK4,  v4i1>;
3308defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK8,  v8i1>;
3309defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK16, v16i1>;
3310defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK32, v32i1>;
3311defm : operation_subvector_mask_lowering<VK2,  v2i1,  VK64, v64i1>;
3312
3313defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK8,  v8i1>;
3314defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK16, v16i1>;
3315defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK32, v32i1>;
3316defm : operation_subvector_mask_lowering<VK4,  v4i1,  VK64, v64i1>;
3317
3318defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK16, v16i1>;
3319defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK32, v32i1>;
3320defm : operation_subvector_mask_lowering<VK8,  v8i1,  VK64, v64i1>;
3321
3322defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3323defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3324
3325defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3326
3327//===----------------------------------------------------------------------===//
3328// AVX-512 - Aligned and unaligned load and store
3329//
3330
3331multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3332                       X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3333                       X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3334                       bit NoRMPattern = 0,
3335                       SDPatternOperator SelectOprr = vselect> {
3336  let hasSideEffects = 0 in {
3337  let isMoveReg = 1 in
3338  def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3339                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3340                    _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3341                    EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3342  def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3343                      (ins _.KRCWM:$mask,  _.RC:$src),
3344                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3345                       "${dst} {${mask}} {z}, $src}"),
3346                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3347                                           (_.VT _.RC:$src),
3348                                           _.ImmAllZerosV)))], _.ExeDomain>,
3349                       EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3350
3351  let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3352  def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3353                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3354                    !if(NoRMPattern, [],
3355                        [(set _.RC:$dst,
3356                          (_.VT (bitconvert (ld_frag addr:$src))))]),
3357                    _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3358                    EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3359
3360  let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3361    def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3362                      (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3363                      !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3364                      "${dst} {${mask}}, $src1}"),
3365                      [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3366                                          (_.VT _.RC:$src1),
3367                                          (_.VT _.RC:$src0))))], _.ExeDomain>,
3368                       EVEX, EVEX_K, Sched<[Sched.RR]>;
3369    def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3370                     (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3371                     !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3372                      "${dst} {${mask}}, $src1}"),
3373                     [(set _.RC:$dst, (_.VT
3374                         (vselect _.KRCWM:$mask,
3375                          (_.VT (bitconvert (ld_frag addr:$src1))),
3376                           (_.VT _.RC:$src0))))], _.ExeDomain>,
3377                     EVEX, EVEX_K, Sched<[Sched.RM]>;
3378  }
3379  def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3380                  (ins _.KRCWM:$mask, _.MemOp:$src),
3381                  OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3382                                "${dst} {${mask}} {z}, $src}",
3383                  [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3384                    (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3385                  _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3386  }
3387  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3388            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3389
3390  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3391            (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3392
3393  def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3394            (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3395             _.KRCWM:$mask, addr:$ptr)>;
3396}
3397
3398multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3399                                 AVX512VLVectorVTInfo _, Predicate prd,
3400                                 X86SchedWriteMoveLSWidths Sched,
3401                                 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3402  let Predicates = [prd] in
3403  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3404                       _.info512.AlignedLdFrag, masked_load_aligned512,
3405                       Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3406
3407  let Predicates = [prd, HasVLX] in {
3408  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3409                          _.info256.AlignedLdFrag, masked_load_aligned256,
3410                          Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3411  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3412                          _.info128.AlignedLdFrag, masked_load_aligned128,
3413                          Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3414  }
3415}
3416
3417multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3418                          AVX512VLVectorVTInfo _, Predicate prd,
3419                          X86SchedWriteMoveLSWidths Sched,
3420                          string EVEX2VEXOvrd, bit NoRMPattern = 0,
3421                          SDPatternOperator SelectOprr = vselect> {
3422  let Predicates = [prd] in
3423  defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3424                       masked_load_unaligned, Sched.ZMM, "",
3425                       NoRMPattern, SelectOprr>, EVEX_V512;
3426
3427  let Predicates = [prd, HasVLX] in {
3428  defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3429                         masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
3430                         NoRMPattern, SelectOprr>, EVEX_V256;
3431  defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3432                         masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
3433                         NoRMPattern, SelectOprr>, EVEX_V128;
3434  }
3435}
3436
3437multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3438                        X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3439                        X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3440                        bit NoMRPattern = 0> {
3441  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3442  let isMoveReg = 1 in
3443  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3444                         OpcodeStr # "\t{$src, $dst|$dst, $src}",
3445                         [], _.ExeDomain>, EVEX,
3446                         FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3447                         EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3448  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3449                         (ins _.KRCWM:$mask, _.RC:$src),
3450                         OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3451                         "${dst} {${mask}}, $src}",
3452                         [], _.ExeDomain>,  EVEX, EVEX_K,
3453                         FoldGenData<BaseName#_.ZSuffix#rrk>,
3454                         Sched<[Sched.RR]>;
3455  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
3456                          (ins _.KRCWM:$mask, _.RC:$src),
3457                          OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3458                          "${dst} {${mask}} {z}, $src}",
3459                          [], _.ExeDomain>, EVEX, EVEX_KZ,
3460                          FoldGenData<BaseName#_.ZSuffix#rrkz>,
3461                          Sched<[Sched.RR]>;
3462  }
3463
3464  let hasSideEffects = 0, mayStore = 1 in
3465  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3466                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3467                    !if(NoMRPattern, [],
3468                        [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3469                    _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3470                    EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3471  def mrk : AVX512PI<opc, MRMDestMem, (outs),
3472                     (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3473              OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3474               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3475               NotMemoryFoldable;
3476
3477  def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3478           (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3479                                                        _.KRCWM:$mask, _.RC:$src)>;
3480
3481  def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3482                  (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3483                   _.RC:$dst, _.RC:$src), 0>;
3484  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3485                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3486                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3487  def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3488                  (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3489                   _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3490}
3491
3492multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3493                            AVX512VLVectorVTInfo _, Predicate prd,
3494                            X86SchedWriteMoveLSWidths Sched,
3495                            string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3496  let Predicates = [prd] in
3497  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3498                        masked_store_unaligned, Sched.ZMM, "",
3499                        NoMRPattern>, EVEX_V512;
3500  let Predicates = [prd, HasVLX] in {
3501    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3502                             masked_store_unaligned, Sched.YMM,
3503                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3504    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3505                             masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
3506                             NoMRPattern>, EVEX_V128;
3507  }
3508}
3509
3510multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3511                                  AVX512VLVectorVTInfo _, Predicate prd,
3512                                  X86SchedWriteMoveLSWidths Sched,
3513                                  string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3514  let Predicates = [prd] in
3515  defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3516                        masked_store_aligned512, Sched.ZMM, "",
3517                        NoMRPattern>, EVEX_V512;
3518
3519  let Predicates = [prd, HasVLX] in {
3520    defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3521                             masked_store_aligned256, Sched.YMM,
3522                             EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3523    defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3524                             masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
3525                             NoMRPattern>, EVEX_V128;
3526  }
3527}
3528
3529defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3530                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3531               avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3532                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3533               PS, EVEX_CD8<32, CD8VF>;
3534
3535defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3536                                     HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3537               avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3538                                      HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3539               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3540
3541defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3542                              SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3543               avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3544                               SchedWriteFMoveLS, "VMOVUPS">,
3545                               PS, EVEX_CD8<32, CD8VF>;
3546
3547defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3548                              SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3549               avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3550                               SchedWriteFMoveLS, "VMOVUPD">,
3551               PD, VEX_W, EVEX_CD8<64, CD8VF>;
3552
3553defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3554                                       HasAVX512, SchedWriteVecMoveLS,
3555                                       "VMOVDQA", 1>,
3556                 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3557                                        HasAVX512, SchedWriteVecMoveLS,
3558                                        "VMOVDQA", 1>,
3559                 PD, EVEX_CD8<32, CD8VF>;
3560
3561defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3562                                       HasAVX512, SchedWriteVecMoveLS,
3563                                       "VMOVDQA">,
3564                 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3565                                        HasAVX512, SchedWriteVecMoveLS,
3566                                        "VMOVDQA">,
3567                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3568
3569defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3570                               SchedWriteVecMoveLS, "VMOVDQU", 1>,
3571                avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3572                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3573                XD, EVEX_CD8<8, CD8VF>;
3574
3575defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3576                                SchedWriteVecMoveLS, "VMOVDQU", 1>,
3577                 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3578                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3579                 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3580
3581defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3582                                SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3583                 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3584                                 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3585                 XS, EVEX_CD8<32, CD8VF>;
3586
3587defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3588                                SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3589                 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3590                                 SchedWriteVecMoveLS, "VMOVDQU">,
3591                 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3592
3593// Special instructions to help with spilling when we don't have VLX. We need
3594// to load or store from a ZMM register instead. These are converted in
3595// expandPostRAPseudos.
3596let isReMaterializable = 1, canFoldAsLoad = 1,
3597    isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3598def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3599                            "", []>, Sched<[WriteFLoadX]>;
3600def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3601                            "", []>, Sched<[WriteFLoadY]>;
3602def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3603                            "", []>, Sched<[WriteFLoadX]>;
3604def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3605                            "", []>, Sched<[WriteFLoadY]>;
3606}
3607
3608let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3609def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3610                            "", []>, Sched<[WriteFStoreX]>;
3611def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3612                            "", []>, Sched<[WriteFStoreY]>;
3613def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3614                            "", []>, Sched<[WriteFStoreX]>;
3615def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3616                            "", []>, Sched<[WriteFStoreY]>;
3617}
3618
3619def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3620                          (v8i64 VR512:$src))),
3621   (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3622                                              VK8), VR512:$src)>;
3623
3624def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3625                           (v16i32 VR512:$src))),
3626                  (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3627
3628// These patterns exist to prevent the above patterns from introducing a second
3629// mask inversion when one already exists.
3630def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3631                          (bc_v8i64 (v16i32 immAllZerosV)),
3632                          (v8i64 VR512:$src))),
3633                 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3634def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3635                           (v16i32 immAllZerosV),
3636                           (v16i32 VR512:$src))),
3637                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3638
3639multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3640                              X86VectorVTInfo Wide> {
3641 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3642                               Narrow.RC:$src1, Narrow.RC:$src0)),
3643           (EXTRACT_SUBREG
3644            (Wide.VT
3645             (!cast<Instruction>(InstrStr#"rrk")
3646              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3647              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3648              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3649            Narrow.SubRegIdx)>;
3650
3651 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3652                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3653           (EXTRACT_SUBREG
3654            (Wide.VT
3655             (!cast<Instruction>(InstrStr#"rrkz")
3656              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3657              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3658            Narrow.SubRegIdx)>;
3659}
3660
3661// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3662// available. Use a 512-bit operation and extract.
3663let Predicates = [HasAVX512, NoVLX] in {
3664  defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3665  defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3666  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3667  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3668
3669  defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3670  defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3671  defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3672  defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3673}
3674
3675let Predicates = [HasBWI, NoVLX] in {
3676  defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3677  defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3678
3679  defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3680  defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3681}
3682
3683let Predicates = [HasAVX512] in {
3684  // 512-bit store.
3685  def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3686            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3687  def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3688            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3689  def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3690            (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3691  def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3692            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3693  def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3694            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3695  def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3696            (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3697}
3698
3699let Predicates = [HasVLX] in {
3700  // 128-bit store.
3701  def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3702            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3703  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3704            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3705  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3706            (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3707  def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3708            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3709  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3710            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3711  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3712            (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3713
3714  // 256-bit store.
3715  def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3716            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3717  def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3718            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3719  def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3720            (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3721  def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3722            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3723  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3724            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3725  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3726            (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3727}
3728
3729multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3730                                   X86VectorVTInfo To, X86VectorVTInfo Cast> {
3731  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3732                              (bitconvert
3733                               (To.VT (extract_subvector
3734                                       (From.VT From.RC:$src), (iPTR 0)))),
3735                              To.RC:$src0)),
3736            (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3737                      Cast.RC:$src0, Cast.KRCWM:$mask,
3738                      (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3739
3740  def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3741                              (bitconvert
3742                               (To.VT (extract_subvector
3743                                       (From.VT From.RC:$src), (iPTR 0)))),
3744                              Cast.ImmAllZerosV)),
3745            (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3746                      Cast.KRCWM:$mask,
3747                      (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3748}
3749
3750
3751let Predicates = [HasVLX] in {
3752// A masked extract from the first 128-bits of a 256-bit vector can be
3753// implemented with masked move.
3754defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info,  v2i64x_info, v2i64x_info>;
3755defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info,  v4i32x_info, v2i64x_info>;
3756defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3757defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info,  v16i8x_info, v2i64x_info>;
3758defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info,  v2i64x_info, v4i32x_info>;
3759defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info,  v4i32x_info, v4i32x_info>;
3760defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3761defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info,  v16i8x_info, v4i32x_info>;
3762defm : masked_move_for_extract<"VMOVAPDZ128",   v4f64x_info,  v2f64x_info, v2f64x_info>;
3763defm : masked_move_for_extract<"VMOVAPDZ128",   v8f32x_info,  v4f32x_info, v2f64x_info>;
3764defm : masked_move_for_extract<"VMOVAPSZ128",   v4f64x_info,  v2f64x_info, v4f32x_info>;
3765defm : masked_move_for_extract<"VMOVAPSZ128",   v8f32x_info,  v4f32x_info, v4f32x_info>;
3766
3767// A masked extract from the first 128-bits of a 512-bit vector can be
3768// implemented with masked move.
3769defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info,  v2i64x_info, v2i64x_info>;
3770defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3771defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3772defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info,  v16i8x_info, v2i64x_info>;
3773defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info,  v2i64x_info, v4i32x_info>;
3774defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3775defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3776defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info,  v16i8x_info, v4i32x_info>;
3777defm : masked_move_for_extract<"VMOVAPDZ128",   v8f64_info,  v2f64x_info, v2f64x_info>;
3778defm : masked_move_for_extract<"VMOVAPDZ128",   v16f32_info, v4f32x_info, v2f64x_info>;
3779defm : masked_move_for_extract<"VMOVAPSZ128",   v8f64_info,  v2f64x_info, v4f32x_info>;
3780defm : masked_move_for_extract<"VMOVAPSZ128",   v16f32_info, v4f32x_info, v4f32x_info>;
3781
3782// A masked extract from the first 256-bits of a 512-bit vector can be
3783// implemented with masked move.
3784defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info,  v4i64x_info,  v4i64x_info>;
3785defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info,  v4i64x_info>;
3786defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3787defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info,  v32i8x_info,  v4i64x_info>;
3788defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info,  v4i64x_info,  v8i32x_info>;
3789defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info,  v8i32x_info>;
3790defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3791defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info,  v32i8x_info,  v8i32x_info>;
3792defm : masked_move_for_extract<"VMOVAPDZ256",   v8f64_info,  v4f64x_info,  v4f64x_info>;
3793defm : masked_move_for_extract<"VMOVAPDZ256",   v16f32_info, v8f32x_info,  v4f64x_info>;
3794defm : masked_move_for_extract<"VMOVAPSZ256",   v8f64_info,  v4f64x_info,  v8f32x_info>;
3795defm : masked_move_for_extract<"VMOVAPSZ256",   v16f32_info, v8f32x_info,  v8f32x_info>;
3796}
3797
3798// Move Int Doubleword to Packed Double Int
3799//
3800let ExeDomain = SSEPackedInt in {
3801def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3802                      "vmovd\t{$src, $dst|$dst, $src}",
3803                      [(set VR128X:$dst,
3804                        (v4i32 (scalar_to_vector GR32:$src)))]>,
3805                        EVEX, Sched<[WriteVecMoveFromGpr]>;
3806def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3807                      "vmovd\t{$src, $dst|$dst, $src}",
3808                      [(set VR128X:$dst,
3809                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3810                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3811def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3812                      "vmovq\t{$src, $dst|$dst, $src}",
3813                        [(set VR128X:$dst,
3814                          (v2i64 (scalar_to_vector GR64:$src)))]>,
3815                      EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3816let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3817def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3818                      (ins i64mem:$src),
3819                      "vmovq\t{$src, $dst|$dst, $src}", []>,
3820                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3821let isCodeGenOnly = 1 in {
3822def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3823                       "vmovq\t{$src, $dst|$dst, $src}",
3824                       [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3825                       EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3826def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3827                      "vmovq\t{$src, $dst|$dst, $src}",
3828                      [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3829                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3830def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3831                         "vmovq\t{$src, $dst|$dst, $src}",
3832                         [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3833                         EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3834def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3835                         "vmovq\t{$src, $dst|$dst, $src}",
3836                         [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
3837                         EVEX, VEX_W, Sched<[WriteVecStore]>,
3838                         EVEX_CD8<64, CD8VT1>;
3839}
3840} // ExeDomain = SSEPackedInt
3841
3842// Move Int Doubleword to Single Scalar
3843//
3844let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3845def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3846                      "vmovd\t{$src, $dst|$dst, $src}",
3847                      [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3848                      EVEX, Sched<[WriteVecMoveFromGpr]>;
3849
3850def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3851                      "vmovd\t{$src, $dst|$dst, $src}",
3852                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
3853                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3854} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3855
3856// Move doubleword from xmm register to r/m32
3857//
3858let ExeDomain = SSEPackedInt in {
3859def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3860                       "vmovd\t{$src, $dst|$dst, $src}",
3861                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3862                                        (iPTR 0)))]>,
3863                       EVEX, Sched<[WriteVecMoveToGpr]>;
3864def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3865                       (ins i32mem:$dst, VR128X:$src),
3866                       "vmovd\t{$src, $dst|$dst, $src}",
3867                       [(store (i32 (extractelt (v4i32 VR128X:$src),
3868                                     (iPTR 0))), addr:$dst)]>,
3869                       EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3870} // ExeDomain = SSEPackedInt
3871
3872// Move quadword from xmm1 register to r/m64
3873//
3874let ExeDomain = SSEPackedInt in {
3875def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3876                      "vmovq\t{$src, $dst|$dst, $src}",
3877                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3878                                                   (iPTR 0)))]>,
3879                      PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3880                      Requires<[HasAVX512]>;
3881
3882let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3883def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3884                      "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3885                      EVEX, VEX_W, Sched<[WriteVecStore]>,
3886                      Requires<[HasAVX512, In64BitMode]>;
3887
3888def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3889                      (ins i64mem:$dst, VR128X:$src),
3890                      "vmovq\t{$src, $dst|$dst, $src}",
3891                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3892                              addr:$dst)]>,
3893                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3894                      Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3895
3896let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3897def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3898                             (ins VR128X:$src),
3899                             "vmovq\t{$src, $dst|$dst, $src}", []>,
3900                             EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3901} // ExeDomain = SSEPackedInt
3902
3903def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3904                (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3905
3906// Move Scalar Single to Double Int
3907//
3908let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3909def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3910                      (ins FR32X:$src),
3911                      "vmovd\t{$src, $dst|$dst, $src}",
3912                      [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3913                      EVEX, Sched<[WriteVecMoveToGpr]>;
3914def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
3915                      (ins i32mem:$dst, FR32X:$src),
3916                      "vmovd\t{$src, $dst|$dst, $src}",
3917                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
3918                      EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3919} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3920
3921// Move Quadword Int to Packed Quadword Int
3922//
3923let ExeDomain = SSEPackedInt in {
3924def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3925                      (ins i64mem:$src),
3926                      "vmovq\t{$src, $dst|$dst, $src}",
3927                      [(set VR128X:$dst,
3928                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3929                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3930} // ExeDomain = SSEPackedInt
3931
3932// Allow "vmovd" but print "vmovq".
3933def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3934                (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3935def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3936                (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3937
3938//===----------------------------------------------------------------------===//
3939// AVX-512  MOVSS, MOVSD
3940//===----------------------------------------------------------------------===//
3941
3942multiclass avx512_move_scalar<string asm, SDNode OpNode,
3943                              X86VectorVTInfo _> {
3944  let Predicates = [HasAVX512, OptForSize] in
3945  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3946             (ins _.RC:$src1, _.RC:$src2),
3947             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3948             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3949             _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3950  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3951              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3952              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3953              "$dst {${mask}} {z}, $src1, $src2}"),
3954              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3955                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3956                                      _.ImmAllZerosV)))],
3957              _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3958  let Constraints = "$src0 = $dst"  in
3959  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3960             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3961             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3962             "$dst {${mask}}, $src1, $src2}"),
3963             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3964                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3965                                     (_.VT _.RC:$src0))))],
3966             _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3967  let canFoldAsLoad = 1, isReMaterializable = 1 in
3968  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3969             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3970             [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3971             _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3972  let mayLoad = 1, hasSideEffects = 0 in {
3973    let Constraints = "$src0 = $dst" in
3974    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3975               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3976               !strconcat(asm, "\t{$src, $dst {${mask}}|",
3977               "$dst {${mask}}, $src}"),
3978               [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3979    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3980               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3981               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3982               "$dst {${mask}} {z}, $src}"),
3983               [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3984  }
3985  def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3986             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3987             [(store _.FRC:$src, addr:$dst)],  _.ExeDomain>,
3988             EVEX, Sched<[WriteFStore]>;
3989  let mayStore = 1, hasSideEffects = 0 in
3990  def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3991              (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3992              !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3993              [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3994              NotMemoryFoldable;
3995}
3996
3997defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3998                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3999
4000defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
4001                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4002
4003
4004multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4005                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
4006
4007def : Pat<(_.VT (OpNode _.RC:$src0,
4008                        (_.VT (scalar_to_vector
4009                                  (_.EltVT (X86selects VK1WM:$mask,
4010                                                       (_.EltVT _.FRC:$src1),
4011                                                       (_.EltVT _.FRC:$src2))))))),
4012          (!cast<Instruction>(InstrStr#rrk)
4013                        (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4014                        VK1WM:$mask,
4015                        (_.VT _.RC:$src0),
4016                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4017
4018def : Pat<(_.VT (OpNode _.RC:$src0,
4019                        (_.VT (scalar_to_vector
4020                                  (_.EltVT (X86selects VK1WM:$mask,
4021                                                       (_.EltVT _.FRC:$src1),
4022                                                       (_.EltVT ZeroFP))))))),
4023          (!cast<Instruction>(InstrStr#rrkz)
4024                        VK1WM:$mask,
4025                        (_.VT _.RC:$src0),
4026                        (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4027}
4028
4029multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4030                                        dag Mask, RegisterClass MaskRC> {
4031
4032def : Pat<(masked_store addr:$dst, Mask,
4033             (_.info512.VT (insert_subvector undef,
4034                               (_.info128.VT _.info128.RC:$src),
4035                               (iPTR 0)))),
4036          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4037                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4038                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4039
4040}
4041
4042multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4043                                               AVX512VLVectorVTInfo _,
4044                                               dag Mask, RegisterClass MaskRC,
4045                                               SubRegIndex subreg> {
4046
4047def : Pat<(masked_store addr:$dst, Mask,
4048             (_.info512.VT (insert_subvector undef,
4049                               (_.info128.VT _.info128.RC:$src),
4050                               (iPTR 0)))),
4051          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4053                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4054
4055}
4056
4057// This matches the more recent codegen from clang that avoids emitting a 512
4058// bit masked store directly. Codegen will widen 128-bit masked store to 512
4059// bits on AVX512F only targets.
4060multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4061                                               AVX512VLVectorVTInfo _,
4062                                               dag Mask512, dag Mask128,
4063                                               RegisterClass MaskRC,
4064                                               SubRegIndex subreg> {
4065
4066// AVX512F pattern.
4067def : Pat<(masked_store addr:$dst, Mask512,
4068             (_.info512.VT (insert_subvector undef,
4069                               (_.info128.VT _.info128.RC:$src),
4070                               (iPTR 0)))),
4071          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4072                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4073                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4074
4075// AVX512VL pattern.
4076def : Pat<(masked_store addr:$dst, Mask128, (_.info128.VT _.info128.RC:$src)),
4077          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4078                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4079                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4080}
4081
4082multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4083                                       dag Mask, RegisterClass MaskRC> {
4084
4085def : Pat<(_.info128.VT (extract_subvector
4086                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4087                                        (_.info512.VT (bitconvert
4088                                                       (v16i32 immAllZerosV))))),
4089                           (iPTR 0))),
4090          (!cast<Instruction>(InstrStr#rmkz)
4091                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4092                      addr:$srcAddr)>;
4093
4094def : Pat<(_.info128.VT (extract_subvector
4095                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4096                      (_.info512.VT (insert_subvector undef,
4097                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4098                            (iPTR 0))))),
4099                (iPTR 0))),
4100          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4101                      (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4102                      addr:$srcAddr)>;
4103
4104}
4105
4106multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4107                                              AVX512VLVectorVTInfo _,
4108                                              dag Mask, RegisterClass MaskRC,
4109                                              SubRegIndex subreg> {
4110
4111def : Pat<(_.info128.VT (extract_subvector
4112                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
4113                                        (_.info512.VT (bitconvert
4114                                                       (v16i32 immAllZerosV))))),
4115                           (iPTR 0))),
4116          (!cast<Instruction>(InstrStr#rmkz)
4117                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4118                      addr:$srcAddr)>;
4119
4120def : Pat<(_.info128.VT (extract_subvector
4121                (_.info512.VT (masked_load addr:$srcAddr, Mask,
4122                      (_.info512.VT (insert_subvector undef,
4123                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4124                            (iPTR 0))))),
4125                (iPTR 0))),
4126          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4127                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4128                      addr:$srcAddr)>;
4129
4130}
4131
4132// This matches the more recent codegen from clang that avoids emitting a 512
4133// bit masked load directly. Codegen will widen 128-bit masked load to 512
4134// bits on AVX512F only targets.
4135multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4136                                              AVX512VLVectorVTInfo _,
4137                                              dag Mask512, dag Mask128,
4138                                              RegisterClass MaskRC,
4139                                              SubRegIndex subreg> {
4140// AVX512F patterns.
4141def : Pat<(_.info128.VT (extract_subvector
4142                         (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4143                                        (_.info512.VT (bitconvert
4144                                                       (v16i32 immAllZerosV))))),
4145                           (iPTR 0))),
4146          (!cast<Instruction>(InstrStr#rmkz)
4147                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4148                      addr:$srcAddr)>;
4149
4150def : Pat<(_.info128.VT (extract_subvector
4151                (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4152                      (_.info512.VT (insert_subvector undef,
4153                            (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4154                            (iPTR 0))))),
4155                (iPTR 0))),
4156          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4157                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4158                      addr:$srcAddr)>;
4159
4160// AVX512Vl patterns.
4161def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4162                         (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
4163          (!cast<Instruction>(InstrStr#rmkz)
4164                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4165                      addr:$srcAddr)>;
4166
4167def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4168                         (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4169          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4170                      (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4171                      addr:$srcAddr)>;
4172}
4173
4174defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4175defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4176
4177defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4178                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4179defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4180                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4181defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4182                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4183
4184defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4185                   (v16i1 (insert_subvector
4186                           (v16i1 immAllZerosV),
4187                           (v4i1 (extract_subvector
4188                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4189                                  (iPTR 0))),
4190                           (iPTR 0))),
4191                   (v4i1 (extract_subvector
4192                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4193                          (iPTR 0))), GR8, sub_8bit>;
4194defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4195                   (v8i1
4196                    (extract_subvector
4197                     (v16i1
4198                      (insert_subvector
4199                       (v16i1 immAllZerosV),
4200                       (v2i1 (extract_subvector
4201                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4202                              (iPTR 0))),
4203                       (iPTR 0))),
4204                     (iPTR 0))),
4205                   (v2i1 (extract_subvector
4206                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4207                          (iPTR 0))), GR8, sub_8bit>;
4208
4209defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4210                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4211defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4212                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4213defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4214                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4215
4216defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4217                   (v16i1 (insert_subvector
4218                           (v16i1 immAllZerosV),
4219                           (v4i1 (extract_subvector
4220                                  (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4221                                  (iPTR 0))),
4222                           (iPTR 0))),
4223                   (v4i1 (extract_subvector
4224                          (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4225                          (iPTR 0))), GR8, sub_8bit>;
4226defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4227                   (v8i1
4228                    (extract_subvector
4229                     (v16i1
4230                      (insert_subvector
4231                       (v16i1 immAllZerosV),
4232                       (v2i1 (extract_subvector
4233                              (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4234                              (iPTR 0))),
4235                       (iPTR 0))),
4236                     (iPTR 0))),
4237                   (v2i1 (extract_subvector
4238                          (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4239                          (iPTR 0))), GR8, sub_8bit>;
4240
4241def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4242          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4243           (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4244           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4245           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4246
4247def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4248          (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4249           (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4250
4251def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4252          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4253           (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4254           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4255           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4256
4257def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4258          (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4259           (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4260
4261let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4262  def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4263                           (ins VR128X:$src1, VR128X:$src2),
4264                           "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4265                           []>, XS, EVEX_4V, VEX_LIG,
4266                           FoldGenData<"VMOVSSZrr">,
4267                           Sched<[SchedWriteFShuffle.XMM]>;
4268
4269  let Constraints = "$src0 = $dst" in
4270  def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4271                             (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4272                                                   VR128X:$src1, VR128X:$src2),
4273                             "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4274                                        "$dst {${mask}}, $src1, $src2}",
4275                             []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4276                             FoldGenData<"VMOVSSZrrk">,
4277                             Sched<[SchedWriteFShuffle.XMM]>;
4278
4279  def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4280                         (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4281                         "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4282                                    "$dst {${mask}} {z}, $src1, $src2}",
4283                         []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4284                         FoldGenData<"VMOVSSZrrkz">,
4285                         Sched<[SchedWriteFShuffle.XMM]>;
4286
4287  def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4288                           (ins VR128X:$src1, VR128X:$src2),
4289                           "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4290                           []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4291                           FoldGenData<"VMOVSDZrr">,
4292                           Sched<[SchedWriteFShuffle.XMM]>;
4293
4294  let Constraints = "$src0 = $dst" in
4295  def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4296                             (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4297                                                   VR128X:$src1, VR128X:$src2),
4298                             "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4299                                        "$dst {${mask}}, $src1, $src2}",
4300                             []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4301                             VEX_W, FoldGenData<"VMOVSDZrrk">,
4302                             Sched<[SchedWriteFShuffle.XMM]>;
4303
4304  def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4305                              (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4306                                                          VR128X:$src2),
4307                              "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4308                                         "$dst {${mask}} {z}, $src1, $src2}",
4309                              []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4310                              VEX_W, FoldGenData<"VMOVSDZrrkz">,
4311                              Sched<[SchedWriteFShuffle.XMM]>;
4312}
4313
4314def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4315                (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4316def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4317                             "$dst {${mask}}, $src1, $src2}",
4318                (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4319                                VR128X:$src1, VR128X:$src2), 0>;
4320def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4321                             "$dst {${mask}} {z}, $src1, $src2}",
4322                (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4323                                 VR128X:$src1, VR128X:$src2), 0>;
4324def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4325                (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4326def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4327                             "$dst {${mask}}, $src1, $src2}",
4328                (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4329                                VR128X:$src1, VR128X:$src2), 0>;
4330def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4331                             "$dst {${mask}} {z}, $src1, $src2}",
4332                (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4333                                 VR128X:$src1, VR128X:$src2), 0>;
4334
4335let Predicates = [HasAVX512, OptForSize] in {
4336  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4337            (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4338  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4339            (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4340
4341  // Move low f32 and clear high bits.
4342  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4343            (SUBREG_TO_REG (i32 0),
4344             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4345              (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4346  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4347            (SUBREG_TO_REG (i32 0),
4348             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4349              (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4350
4351  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4352            (SUBREG_TO_REG (i32 0),
4353             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4354              (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4355  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4356            (SUBREG_TO_REG (i32 0),
4357             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4358              (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4359
4360  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4361            (SUBREG_TO_REG (i32 0),
4362             (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4363              (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4364  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4365            (SUBREG_TO_REG (i32 0),
4366             (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4367              (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4368
4369  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4370            (SUBREG_TO_REG (i32 0),
4371             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4372              (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
4373
4374  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4375            (SUBREG_TO_REG (i32 0),
4376             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4377              (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
4378
4379}
4380
4381// Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4382// VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4383let Predicates = [HasAVX512, OptForSpeed] in {
4384  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4385            (SUBREG_TO_REG (i32 0),
4386             (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4387                          (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4388                          (i8 1))), sub_xmm)>;
4389  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4390            (SUBREG_TO_REG (i32 0),
4391             (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4392                          (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4393                          (i8 3))), sub_xmm)>;
4394
4395  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4396            (SUBREG_TO_REG (i32 0),
4397             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
4398                          (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
4399                          (i8 1))), sub_xmm)>;
4400  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4401            (SUBREG_TO_REG (i32 0),
4402             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
4403                          (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
4404                          (i8 0xf))), sub_xmm)>;
4405}
4406
4407let Predicates = [HasAVX512] in {
4408
4409  // MOVSSrm zeros the high parts of the register; represent this
4410  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4411  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4412            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4413  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4414            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4415  def : Pat<(v4f32 (X86vzload addr:$src)),
4416            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4417
4418  // MOVSDrm zeros the high parts of the register; represent this
4419  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4420  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4421            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4422  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4423            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4424  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4425            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4426  def : Pat<(v2f64 (X86vzload addr:$src)),
4427            (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4428
4429  // Represent the same patterns above but in the form they appear for
4430  // 256-bit types
4431  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4432                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4433            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4434  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4435                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4436            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4437  def : Pat<(v8f32 (X86vzload addr:$src)),
4438            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4439  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4440                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4441            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4442  def : Pat<(v4f64 (X86vzload addr:$src)),
4443            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4444
4445  // Represent the same patterns above but in the form they appear for
4446  // 512-bit types
4447  def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4448                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4449            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4450  def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4451                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4452            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4453  def : Pat<(v16f32 (X86vzload addr:$src)),
4454            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4455  def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4456                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4457            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4458  def : Pat<(v8f64 (X86vzload addr:$src)),
4459            (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4460
4461  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4462                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4463            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4464
4465  // Extract and store.
4466  def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4467                   addr:$dst),
4468            (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4469}
4470
4471let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4472def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4473                                (ins VR128X:$src),
4474                                "vmovq\t{$src, $dst|$dst, $src}",
4475                                [(set VR128X:$dst, (v2i64 (X86vzmovl
4476                                                   (v2i64 VR128X:$src))))]>,
4477                                EVEX, VEX_W;
4478}
4479
4480let Predicates = [HasAVX512] in {
4481  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4482            (VMOVDI2PDIZrr GR32:$src)>;
4483
4484  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4485            (VMOV64toPQIZrr GR64:$src)>;
4486
4487  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4488                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4489            (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4490
4491  def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4492                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4493            (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4494
4495  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4496  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4497            (VMOVDI2PDIZrm addr:$src)>;
4498  def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4499            (VMOVDI2PDIZrm addr:$src)>;
4500  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4501            (VMOVDI2PDIZrm addr:$src)>;
4502  def : Pat<(v4i32 (X86vzload addr:$src)),
4503            (VMOVDI2PDIZrm addr:$src)>;
4504  def : Pat<(v8i32 (X86vzload addr:$src)),
4505            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4506  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4507            (VMOVQI2PQIZrm addr:$src)>;
4508  def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4509            (VMOVZPQILo2PQIZrr VR128X:$src)>;
4510  def : Pat<(v2i64 (X86vzload addr:$src)),
4511            (VMOVQI2PQIZrm addr:$src)>;
4512  def : Pat<(v4i64 (X86vzload addr:$src)),
4513            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4514
4515  // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4516  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4517                               (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4518            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4519  def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4520                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4521            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4522
4523  // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4524  def : Pat<(v16i32 (X86vzload addr:$src)),
4525            (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4526  def : Pat<(v8i64 (X86vzload addr:$src)),
4527            (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4528}
4529
4530//===----------------------------------------------------------------------===//
4531// AVX-512 - Non-temporals
4532//===----------------------------------------------------------------------===//
4533
4534def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4535                      (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4536                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4537                      EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4538
4539let Predicates = [HasVLX] in {
4540  def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4541                       (ins i256mem:$src),
4542                       "vmovntdqa\t{$src, $dst|$dst, $src}",
4543                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4544                       EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4545
4546  def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4547                      (ins i128mem:$src),
4548                      "vmovntdqa\t{$src, $dst|$dst, $src}",
4549                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4550                      EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4551}
4552
4553multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4554                        X86SchedWriteMoveLS Sched,
4555                        PatFrag st_frag = alignednontemporalstore> {
4556  let SchedRW = [Sched.MR], AddedComplexity = 400 in
4557  def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4558                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4559                    [(st_frag (_.VT _.RC:$src), addr:$dst)],
4560                    _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4561}
4562
4563multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4564                           AVX512VLVectorVTInfo VTInfo,
4565                           X86SchedWriteMoveLSWidths Sched> {
4566  let Predicates = [HasAVX512] in
4567    defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4568
4569  let Predicates = [HasAVX512, HasVLX] in {
4570    defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4571    defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4572  }
4573}
4574
4575defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4576                                SchedWriteVecMoveLSNT>, PD;
4577defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4578                                SchedWriteFMoveLSNT>, PD, VEX_W;
4579defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4580                                SchedWriteFMoveLSNT>, PS;
4581
4582let Predicates = [HasAVX512], AddedComplexity = 400 in {
4583  def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4584            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4585  def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4586            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4587  def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4588            (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4589
4590  def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4591            (VMOVNTDQAZrm addr:$src)>;
4592  def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4593            (VMOVNTDQAZrm addr:$src)>;
4594  def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4595            (VMOVNTDQAZrm addr:$src)>;
4596}
4597
4598let Predicates = [HasVLX], AddedComplexity = 400 in {
4599  def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4600            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4601  def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4602            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4603  def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4604            (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4605
4606  def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4607            (VMOVNTDQAZ256rm addr:$src)>;
4608  def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4609            (VMOVNTDQAZ256rm addr:$src)>;
4610  def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4611            (VMOVNTDQAZ256rm addr:$src)>;
4612
4613  def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4614            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4615  def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4616            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4617  def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4618            (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4619
4620  def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4621            (VMOVNTDQAZ128rm addr:$src)>;
4622  def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4623            (VMOVNTDQAZ128rm addr:$src)>;
4624  def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4625            (VMOVNTDQAZ128rm addr:$src)>;
4626}
4627
4628//===----------------------------------------------------------------------===//
4629// AVX-512 - Integer arithmetic
4630//
4631multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4632                           X86VectorVTInfo _, X86FoldableSchedWrite sched,
4633                           bit IsCommutable = 0> {
4634  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4635                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4636                    "$src2, $src1", "$src1, $src2",
4637                    (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4638                    IsCommutable>, AVX512BIBase, EVEX_4V,
4639                    Sched<[sched]>;
4640
4641  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4642                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4643                  "$src2, $src1", "$src1, $src2",
4644                  (_.VT (OpNode _.RC:$src1,
4645                                (bitconvert (_.LdFrag addr:$src2))))>,
4646                  AVX512BIBase, EVEX_4V,
4647                  Sched<[sched.Folded, ReadAfterLd]>;
4648}
4649
4650multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4651                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
4652                            bit IsCommutable = 0> :
4653           avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4654  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4655                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4656                  "${src2}"##_.BroadcastStr##", $src1",
4657                  "$src1, ${src2}"##_.BroadcastStr,
4658                  (_.VT (OpNode _.RC:$src1,
4659                                (X86VBroadcast
4660                                    (_.ScalarLdFrag addr:$src2))))>,
4661                  AVX512BIBase, EVEX_4V, EVEX_B,
4662                  Sched<[sched.Folded, ReadAfterLd]>;
4663}
4664
4665multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4666                              AVX512VLVectorVTInfo VTInfo,
4667                              X86SchedWriteWidths sched, Predicate prd,
4668                              bit IsCommutable = 0> {
4669  let Predicates = [prd] in
4670    defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4671                             IsCommutable>, EVEX_V512;
4672
4673  let Predicates = [prd, HasVLX] in {
4674    defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4675                                sched.YMM, IsCommutable>, EVEX_V256;
4676    defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4677                                sched.XMM, IsCommutable>, EVEX_V128;
4678  }
4679}
4680
4681multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4682                               AVX512VLVectorVTInfo VTInfo,
4683                               X86SchedWriteWidths sched, Predicate prd,
4684                               bit IsCommutable = 0> {
4685  let Predicates = [prd] in
4686    defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4687                             IsCommutable>, EVEX_V512;
4688
4689  let Predicates = [prd, HasVLX] in {
4690    defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4691                                 sched.YMM, IsCommutable>, EVEX_V256;
4692    defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4693                                 sched.XMM, IsCommutable>, EVEX_V128;
4694  }
4695}
4696
4697multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4698                                X86SchedWriteWidths sched, Predicate prd,
4699                                bit IsCommutable = 0> {
4700  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4701                                  sched, prd, IsCommutable>,
4702                                  VEX_W, EVEX_CD8<64, CD8VF>;
4703}
4704
4705multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4706                                X86SchedWriteWidths sched, Predicate prd,
4707                                bit IsCommutable = 0> {
4708  defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4709                                  sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4710}
4711
4712multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4713                                X86SchedWriteWidths sched, Predicate prd,
4714                                bit IsCommutable = 0> {
4715  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4716                                 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4717                                 VEX_WIG;
4718}
4719
4720multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4721                                X86SchedWriteWidths sched, Predicate prd,
4722                                bit IsCommutable = 0> {
4723  defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4724                                 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4725                                 VEX_WIG;
4726}
4727
4728multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4729                                 SDNode OpNode, X86SchedWriteWidths sched,
4730                                 Predicate prd, bit IsCommutable = 0> {
4731  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4732                                   IsCommutable>;
4733
4734  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4735                                   IsCommutable>;
4736}
4737
4738multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4739                                 SDNode OpNode, X86SchedWriteWidths sched,
4740                                 Predicate prd, bit IsCommutable = 0> {
4741  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4742                                   IsCommutable>;
4743
4744  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4745                                   IsCommutable>;
4746}
4747
4748multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4749                                  bits<8> opc_d, bits<8> opc_q,
4750                                  string OpcodeStr, SDNode OpNode,
4751                                  X86SchedWriteWidths sched,
4752                                  bit IsCommutable = 0> {
4753  defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4754                                    sched, HasAVX512, IsCommutable>,
4755              avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4756                                    sched, HasBWI, IsCommutable>;
4757}
4758
4759multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4760                            X86FoldableSchedWrite sched,
4761                            SDNode OpNode,X86VectorVTInfo _Src,
4762                            X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4763                            bit IsCommutable = 0> {
4764  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4765                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4766                            "$src2, $src1","$src1, $src2",
4767                            (_Dst.VT (OpNode
4768                                         (_Src.VT _Src.RC:$src1),
4769                                         (_Src.VT _Src.RC:$src2))),
4770                            IsCommutable>,
4771                            AVX512BIBase, EVEX_4V, Sched<[sched]>;
4772  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4773                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4774                        "$src2, $src1", "$src1, $src2",
4775                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4776                                      (bitconvert (_Src.LdFrag addr:$src2))))>,
4777                        AVX512BIBase, EVEX_4V,
4778                        Sched<[sched.Folded, ReadAfterLd]>;
4779
4780  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4781                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4782                    OpcodeStr,
4783                    "${src2}"##_Brdct.BroadcastStr##", $src1",
4784                     "$src1, ${src2}"##_Brdct.BroadcastStr,
4785                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4786                                 (_Brdct.VT (X86VBroadcast
4787                                          (_Brdct.ScalarLdFrag addr:$src2))))))>,
4788                    AVX512BIBase, EVEX_4V, EVEX_B,
4789                    Sched<[sched.Folded, ReadAfterLd]>;
4790}
4791
4792defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4793                                    SchedWriteVecALU, 1>;
4794defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4795                                    SchedWriteVecALU, 0>;
4796defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4797                                    SchedWriteVecALU, HasBWI, 1>;
4798defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4799                                    SchedWriteVecALU, HasBWI, 0>;
4800defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4801                                     SchedWriteVecALU, HasBWI, 1>;
4802defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4803                                     SchedWriteVecALU, HasBWI, 0>;
4804defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4805                                    SchedWritePMULLD, HasAVX512, 1>, T8PD;
4806defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4807                                    SchedWriteVecIMul, HasBWI, 1>;
4808defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4809                                    SchedWriteVecIMul, HasDQI, 1>, T8PD,
4810                                    NotEVEX2VEXConvertible;
4811defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4812                                    HasBWI, 1>;
4813defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4814                                     HasBWI, 1>;
4815defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4816                                      SchedWriteVecIMul, HasBWI, 1>, T8PD;
4817defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4818                                   SchedWriteVecALU, HasBWI, 1>;
4819defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4820                                    SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4821defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4822                                     SchedWriteVecIMul, HasAVX512, 1>;
4823
4824multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4825                            X86SchedWriteWidths sched,
4826                            AVX512VLVectorVTInfo _SrcVTInfo,
4827                            AVX512VLVectorVTInfo _DstVTInfo,
4828                            SDNode OpNode, Predicate prd,  bit IsCommutable = 0> {
4829  let Predicates = [prd] in
4830    defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4831                                 _SrcVTInfo.info512, _DstVTInfo.info512,
4832                                 v8i64_info, IsCommutable>,
4833                                  EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4834  let Predicates = [HasVLX, prd] in {
4835    defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4836                                      _SrcVTInfo.info256, _DstVTInfo.info256,
4837                                      v4i64x_info, IsCommutable>,
4838                                      EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4839    defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4840                                      _SrcVTInfo.info128, _DstVTInfo.info128,
4841                                      v2i64x_info, IsCommutable>,
4842                                     EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4843  }
4844}
4845
4846defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4847                                avx512vl_i8_info, avx512vl_i8_info,
4848                                X86multishift, HasVBMI, 0>, T8PD;
4849
4850multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4851                            X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4852                            X86FoldableSchedWrite sched> {
4853  defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4854                    (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4855                    OpcodeStr,
4856                    "${src2}"##_Src.BroadcastStr##", $src1",
4857                     "$src1, ${src2}"##_Src.BroadcastStr,
4858                    (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4859                                 (_Src.VT (X86VBroadcast
4860                                          (_Src.ScalarLdFrag addr:$src2))))))>,
4861                    EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4862                    Sched<[sched.Folded, ReadAfterLd]>;
4863}
4864
4865multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4866                            SDNode OpNode,X86VectorVTInfo _Src,
4867                            X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4868                            bit IsCommutable = 0> {
4869  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4870                            (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4871                            "$src2, $src1","$src1, $src2",
4872                            (_Dst.VT (OpNode
4873                                         (_Src.VT _Src.RC:$src1),
4874                                         (_Src.VT _Src.RC:$src2))),
4875                            IsCommutable>,
4876                            EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4877  defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4878                        (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4879                        "$src2, $src1", "$src1, $src2",
4880                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4881                                      (bitconvert (_Src.LdFrag addr:$src2))))>,
4882                         EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4883                         Sched<[sched.Folded, ReadAfterLd]>;
4884}
4885
4886multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4887                                    SDNode OpNode> {
4888  let Predicates = [HasBWI] in
4889  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4890                                 v32i16_info, SchedWriteShuffle.ZMM>,
4891                avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4892                                 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4893  let Predicates = [HasBWI, HasVLX] in {
4894    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4895                                     v16i16x_info, SchedWriteShuffle.YMM>,
4896                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4897                                      v16i16x_info, SchedWriteShuffle.YMM>,
4898                                      EVEX_V256;
4899    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4900                                     v8i16x_info, SchedWriteShuffle.XMM>,
4901                     avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4902                                      v8i16x_info, SchedWriteShuffle.XMM>,
4903                                      EVEX_V128;
4904  }
4905}
4906multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4907                            SDNode OpNode> {
4908  let Predicates = [HasBWI] in
4909  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4910                                SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4911  let Predicates = [HasBWI, HasVLX] in {
4912    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4913                                     v32i8x_info, SchedWriteShuffle.YMM>,
4914                                     EVEX_V256, VEX_WIG;
4915    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4916                                     v16i8x_info, SchedWriteShuffle.XMM>,
4917                                     EVEX_V128, VEX_WIG;
4918  }
4919}
4920
4921multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4922                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
4923                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4924  let Predicates = [HasBWI] in
4925  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4926                                _Dst.info512, SchedWriteVecIMul.ZMM,
4927                                IsCommutable>, EVEX_V512;
4928  let Predicates = [HasBWI, HasVLX] in {
4929    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4930                                     _Dst.info256, SchedWriteVecIMul.YMM,
4931                                     IsCommutable>, EVEX_V256;
4932    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4933                                     _Dst.info128, SchedWriteVecIMul.XMM,
4934                                     IsCommutable>, EVEX_V128;
4935  }
4936}
4937
4938defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4939defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4940defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4941defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4942
4943defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4944                     avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4945defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4946                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4947
4948defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4949                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4950defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4951                                    SchedWriteVecALU, HasBWI, 1>;
4952defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4953                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4954defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4955                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4956                                    NotEVEX2VEXConvertible;
4957
4958defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4959                                    SchedWriteVecALU, HasBWI, 1>;
4960defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4961                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4962defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4963                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4964defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4965                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4966                                    NotEVEX2VEXConvertible;
4967
4968defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4969                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4970defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4971                                    SchedWriteVecALU, HasBWI, 1>;
4972defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4973                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4974defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4975                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4976                                    NotEVEX2VEXConvertible;
4977
4978defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4979                                    SchedWriteVecALU, HasBWI, 1>;
4980defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4981                                    SchedWriteVecALU, HasBWI, 1>, T8PD;
4982defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4983                                    SchedWriteVecALU, HasAVX512, 1>, T8PD;
4984defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4985                                    SchedWriteVecALU, HasAVX512, 1>, T8PD,
4986                                    NotEVEX2VEXConvertible;
4987
4988// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4989let Predicates = [HasDQI, NoVLX] in {
4990  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4991            (EXTRACT_SUBREG
4992                (VPMULLQZrr
4993                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4994                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4995             sub_ymm)>;
4996
4997  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4998            (EXTRACT_SUBREG
4999                (VPMULLQZrr
5000                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5001                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5002             sub_xmm)>;
5003}
5004
5005// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5006let Predicates = [HasDQI, NoVLX] in {
5007  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5008            (EXTRACT_SUBREG
5009                (VPMULLQZrr
5010                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5011                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5012             sub_ymm)>;
5013
5014  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5015            (EXTRACT_SUBREG
5016                (VPMULLQZrr
5017                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5018                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5019             sub_xmm)>;
5020}
5021
5022multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
5023  def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5024            (EXTRACT_SUBREG
5025                (Instr
5026                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5027                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5028             sub_ymm)>;
5029
5030  def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5031            (EXTRACT_SUBREG
5032                (Instr
5033                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5034                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5035             sub_xmm)>;
5036}
5037
5038let Predicates = [HasAVX512, NoVLX] in {
5039  defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
5040  defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
5041  defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
5042  defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
5043}
5044
5045//===----------------------------------------------------------------------===//
5046// AVX-512  Logical Instructions
5047//===----------------------------------------------------------------------===//
5048
5049// OpNodeMsk is the OpNode to use when element size is important. OpNode will
5050// be set to null_frag for 32-bit elements.
5051multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
5052                           SDPatternOperator OpNode,
5053                           SDNode OpNodeMsk, X86FoldableSchedWrite sched,
5054                           X86VectorVTInfo _, bit IsCommutable = 0> {
5055  let hasSideEffects = 0 in
5056  defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
5057                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5058                    "$src2, $src1", "$src1, $src2",
5059                    (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5060                                     (bitconvert (_.VT _.RC:$src2)))),
5061                    (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5062                                                          _.RC:$src2)))),
5063                    IsCommutable>, AVX512BIBase, EVEX_4V,
5064                    Sched<[sched]>;
5065
5066  let hasSideEffects = 0, mayLoad = 1 in
5067  defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5068                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5069                  "$src2, $src1", "$src1, $src2",
5070                  (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
5071                                   (bitconvert (_.LdFrag addr:$src2)))),
5072                  (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5073                                     (bitconvert (_.LdFrag addr:$src2))))))>,
5074                  AVX512BIBase, EVEX_4V,
5075                  Sched<[sched.Folded, ReadAfterLd]>;
5076}
5077
5078// OpNodeMsk is the OpNode to use where element size is important. So use
5079// for all of the broadcast patterns.
5080multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
5081                            SDPatternOperator OpNode,
5082                            SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _,
5083                            bit IsCommutable = 0> :
5084           avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _,
5085                           IsCommutable> {
5086  defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
5087                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5088                  "${src2}"##_.BroadcastStr##", $src1",
5089                  "$src1, ${src2}"##_.BroadcastStr,
5090                  (_.i64VT (OpNodeMsk _.RC:$src1,
5091                                   (bitconvert
5092                                    (_.VT (X86VBroadcast
5093                                            (_.ScalarLdFrag addr:$src2)))))),
5094                  (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
5095                                     (bitconvert
5096                                      (_.VT (X86VBroadcast
5097                                             (_.ScalarLdFrag addr:$src2))))))))>,
5098                  AVX512BIBase, EVEX_4V, EVEX_B,
5099                  Sched<[sched.Folded, ReadAfterLd]>;
5100}
5101
5102multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
5103                               SDPatternOperator OpNode,
5104                               SDNode OpNodeMsk, X86SchedWriteWidths sched,
5105                               AVX512VLVectorVTInfo VTInfo,
5106                               bit IsCommutable = 0> {
5107  let Predicates = [HasAVX512] in
5108    defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM,
5109                              VTInfo.info512, IsCommutable>, EVEX_V512;
5110
5111  let Predicates = [HasAVX512, HasVLX] in {
5112    defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM,
5113                                 VTInfo.info256, IsCommutable>, EVEX_V256;
5114    defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM,
5115                                 VTInfo.info128, IsCommutable>, EVEX_V128;
5116  }
5117}
5118
5119multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
5120                                 SDNode OpNode, X86SchedWriteWidths sched,
5121                                 bit IsCommutable = 0> {
5122  defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched,
5123                               avx512vl_i64_info, IsCommutable>,
5124                               VEX_W, EVEX_CD8<64, CD8VF>;
5125  defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched,
5126                               avx512vl_i32_info, IsCommutable>,
5127                               EVEX_CD8<32, CD8VF>;
5128}
5129
5130defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5131                                   SchedWriteVecLogic, 1>;
5132defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5133                                  SchedWriteVecLogic, 1>;
5134defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5135                                   SchedWriteVecLogic, 1>;
5136defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5137                                    SchedWriteVecLogic>;
5138
5139//===----------------------------------------------------------------------===//
5140// AVX-512  FP arithmetic
5141//===----------------------------------------------------------------------===//
5142
5143multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5144                            SDNode OpNode, SDNode VecNode,
5145                            X86FoldableSchedWrite sched, bit IsCommutable> {
5146  let ExeDomain = _.ExeDomain in {
5147  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5148                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5149                           "$src2, $src1", "$src1, $src2",
5150                           (_.VT (VecNode _.RC:$src1, _.RC:$src2,
5151                                          (i32 FROUND_CURRENT)))>,
5152                           Sched<[sched]>;
5153
5154  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5155                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5156                         "$src2, $src1", "$src1, $src2",
5157                         (_.VT (VecNode _.RC:$src1,
5158                                        _.ScalarIntMemCPat:$src2,
5159                                        (i32 FROUND_CURRENT)))>,
5160                         Sched<[sched.Folded, ReadAfterLd]>;
5161  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5162  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5163                         (ins _.FRC:$src1, _.FRC:$src2),
5164                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5165                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5166                          Sched<[sched]> {
5167    let isCommutable = IsCommutable;
5168  }
5169  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5170                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5171                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5172                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5173                         (_.ScalarLdFrag addr:$src2)))]>,
5174                         Sched<[sched.Folded, ReadAfterLd]>;
5175  }
5176  }
5177}
5178
5179multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5180                                  SDNode VecNode, X86FoldableSchedWrite sched,
5181                                  bit IsCommutable = 0> {
5182  let ExeDomain = _.ExeDomain in
5183  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5184                          (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5185                          "$rc, $src2, $src1", "$src1, $src2, $rc",
5186                          (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5187                          (i32 imm:$rc)), IsCommutable>,
5188                          EVEX_B, EVEX_RC, Sched<[sched]>;
5189}
5190multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5191                                SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5192                                X86FoldableSchedWrite sched, bit IsCommutable> {
5193  let ExeDomain = _.ExeDomain in {
5194  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5195                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5196                           "$src2, $src1", "$src1, $src2",
5197                           (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5198                           Sched<[sched]>;
5199
5200  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5201                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5202                         "$src2, $src1", "$src1, $src2",
5203                         (_.VT (VecNode _.RC:$src1,
5204                                        _.ScalarIntMemCPat:$src2))>,
5205                         Sched<[sched.Folded, ReadAfterLd]>;
5206
5207  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5208  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5209                         (ins _.FRC:$src1, _.FRC:$src2),
5210                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5211                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5212                          Sched<[sched]> {
5213    let isCommutable = IsCommutable;
5214  }
5215  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5216                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5217                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5218                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5219                         (_.ScalarLdFrag addr:$src2)))]>,
5220                         Sched<[sched.Folded, ReadAfterLd]>;
5221  }
5222
5223  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5224                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5225                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5226                            (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5227                            (i32 FROUND_NO_EXC))>, EVEX_B,
5228                            Sched<[sched]>;
5229  }
5230}
5231
5232multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5233                                SDNode VecNode, X86SchedWriteSizes sched,
5234                                bit IsCommutable> {
5235  defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5236                              sched.PS.Scl, IsCommutable>,
5237             avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
5238                              sched.PS.Scl, IsCommutable>,
5239                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5240  defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5241                              sched.PD.Scl, IsCommutable>,
5242             avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
5243                              sched.PD.Scl, IsCommutable>,
5244                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5245}
5246
5247multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5248                              SDNode VecNode, SDNode SaeNode,
5249                              X86SchedWriteSizes sched, bit IsCommutable> {
5250  defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5251                              VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5252                              XS, EVEX_4V, VEX_LIG,  EVEX_CD8<32, CD8VT1>;
5253  defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5254                              VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5255                              XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5256}
5257defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
5258                                 SchedWriteFAddSizes, 1>;
5259defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
5260                                 SchedWriteFMulSizes, 1>;
5261defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
5262                                 SchedWriteFAddSizes, 0>;
5263defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
5264                                 SchedWriteFDivSizes, 0>;
5265defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
5266                               SchedWriteFCmpSizes, 0>;
5267defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
5268                               SchedWriteFCmpSizes, 0>;
5269
5270// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5271// X86fminc and X86fmaxc instead of X86fmin and X86fmax
5272multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5273                                    X86VectorVTInfo _, SDNode OpNode,
5274                                    X86FoldableSchedWrite sched> {
5275  let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5276  def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5277                         (ins _.FRC:$src1, _.FRC:$src2),
5278                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5279                          [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5280                          Sched<[sched]> {
5281    let isCommutable = 1;
5282  }
5283  def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5284                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5285                         OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5286                         [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5287                         (_.ScalarLdFrag addr:$src2)))]>,
5288                         Sched<[sched.Folded, ReadAfterLd]>;
5289  }
5290}
5291defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5292                                         SchedWriteFCmp.Scl>, XS, EVEX_4V,
5293                                         VEX_LIG, EVEX_CD8<32, CD8VT1>;
5294
5295defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5296                                         SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5297                                         VEX_LIG, EVEX_CD8<64, CD8VT1>;
5298
5299defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5300                                         SchedWriteFCmp.Scl>, XS, EVEX_4V,
5301                                         VEX_LIG, EVEX_CD8<32, CD8VT1>;
5302
5303defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5304                                         SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5305                                         VEX_LIG, EVEX_CD8<64, CD8VT1>;
5306
5307multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5308                            X86VectorVTInfo _, X86FoldableSchedWrite sched,
5309                            bit IsCommutable,
5310                            bit IsKZCommutable = IsCommutable> {
5311  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5312  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5313                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5314                  "$src2, $src1", "$src1, $src2",
5315                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0,
5316                  IsKZCommutable>,
5317                  EVEX_4V, Sched<[sched]>;
5318  let mayLoad = 1 in {
5319    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5320                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5321                    "$src2, $src1", "$src1, $src2",
5322                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5323                    EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5324    defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5325                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5326                     "${src2}"##_.BroadcastStr##", $src1",
5327                     "$src1, ${src2}"##_.BroadcastStr,
5328                     (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5329                                                (_.ScalarLdFrag addr:$src2))))>,
5330                     EVEX_4V, EVEX_B,
5331                     Sched<[sched.Folded, ReadAfterLd]>;
5332    }
5333  }
5334}
5335
5336multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5337                                  SDPatternOperator OpNodeRnd,
5338                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5339  let ExeDomain = _.ExeDomain in
5340  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5341                  (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5342                  "$rc, $src2, $src1", "$src1, $src2, $rc",
5343                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
5344                  EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5345}
5346
5347multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5348                                SDPatternOperator OpNodeRnd,
5349                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5350  let ExeDomain = _.ExeDomain in
5351  defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5352                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5353                  "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5354                  (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
5355                  EVEX_4V, EVEX_B, Sched<[sched]>;
5356}
5357
5358multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5359                             Predicate prd, X86SchedWriteSizes sched,
5360                             bit IsCommutable = 0,
5361                             bit IsPD128Commutable = IsCommutable> {
5362  let Predicates = [prd] in {
5363  defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5364                              sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5365                              EVEX_CD8<32, CD8VF>;
5366  defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5367                              sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5368                              EVEX_CD8<64, CD8VF>;
5369  }
5370
5371    // Define only if AVX512VL feature is present.
5372  let Predicates = [prd, HasVLX] in {
5373    defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5374                                   sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5375                                   EVEX_CD8<32, CD8VF>;
5376    defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5377                                   sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5378                                   EVEX_CD8<32, CD8VF>;
5379    defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5380                                   sched.PD.XMM, IsPD128Commutable,
5381                                   IsCommutable>, EVEX_V128, PD, VEX_W,
5382                                   EVEX_CD8<64, CD8VF>;
5383    defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5384                                   sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5385                                   EVEX_CD8<64, CD8VF>;
5386  }
5387}
5388
5389multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5390                                   X86SchedWriteSizes sched> {
5391  defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5392                                    v16f32_info>,
5393                                    EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5394  defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5395                                    v8f64_info>,
5396                                    EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5397}
5398
5399multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5400                                 X86SchedWriteSizes sched> {
5401  defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5402                                  v16f32_info>,
5403                                  EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5404  defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5405                                  v8f64_info>,
5406                                  EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5407}
5408
5409defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5410                              SchedWriteFAddSizes, 1>,
5411            avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5412defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5413                              SchedWriteFMulSizes, 1>,
5414            avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5415defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5416                              SchedWriteFAddSizes>,
5417            avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5418defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5419                              SchedWriteFDivSizes>,
5420            avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5421defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5422                              SchedWriteFCmpSizes, 0>,
5423            avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
5424defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5425                              SchedWriteFCmpSizes, 0>,
5426            avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
5427let isCodeGenOnly = 1 in {
5428  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5429                                 SchedWriteFCmpSizes, 1>;
5430  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5431                                 SchedWriteFCmpSizes, 1>;
5432}
5433defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5434                               SchedWriteFLogicSizes, 1>;
5435defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5436                               SchedWriteFLogicSizes, 0>;
5437defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5438                               SchedWriteFLogicSizes, 1>;
5439defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5440                               SchedWriteFLogicSizes, 1>;
5441
5442// Patterns catch floating point selects with bitcasted integer logic ops.
5443multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5444                                      X86VectorVTInfo _, Predicate prd> {
5445let Predicates = [prd] in {
5446  // Masked register-register logical operations.
5447  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5448                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5449                   _.RC:$src0)),
5450            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5451             _.RC:$src1, _.RC:$src2)>;
5452  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5453                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5454                   _.ImmAllZerosV)),
5455            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5456             _.RC:$src2)>;
5457  // Masked register-memory logical operations.
5458  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5459                   (bitconvert (_.i64VT (OpNode _.RC:$src1,
5460                                         (load addr:$src2)))),
5461                   _.RC:$src0)),
5462            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5463             _.RC:$src1, addr:$src2)>;
5464  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5465                   (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5466                   _.ImmAllZerosV)),
5467            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5468             addr:$src2)>;
5469  // Register-broadcast logical operations.
5470  def : Pat<(_.i64VT (OpNode _.RC:$src1,
5471                      (bitconvert (_.VT (X86VBroadcast
5472                                         (_.ScalarLdFrag addr:$src2)))))),
5473            (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5474  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5475                   (bitconvert
5476                    (_.i64VT (OpNode _.RC:$src1,
5477                              (bitconvert (_.VT
5478                                           (X86VBroadcast
5479                                            (_.ScalarLdFrag addr:$src2))))))),
5480                   _.RC:$src0)),
5481            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5482             _.RC:$src1, addr:$src2)>;
5483  def : Pat<(_.VT (vselect _.KRCWM:$mask,
5484                   (bitconvert
5485                    (_.i64VT (OpNode _.RC:$src1,
5486                              (bitconvert (_.VT
5487                                           (X86VBroadcast
5488                                            (_.ScalarLdFrag addr:$src2))))))),
5489                   _.ImmAllZerosV)),
5490            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
5491             _.RC:$src1, addr:$src2)>;
5492}
5493}
5494
5495multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5496  defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5497  defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5498  defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5499  defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5500  defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5501  defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5502}
5503
5504defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5505defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5506defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5507defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5508
5509let Predicates = [HasVLX,HasDQI] in {
5510  // Use packed logical operations for scalar ops.
5511  def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5512            (COPY_TO_REGCLASS
5513             (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5514                                  (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5515             FR64X)>;
5516  def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5517            (COPY_TO_REGCLASS
5518             (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5519                                 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5520             FR64X)>;
5521  def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5522            (COPY_TO_REGCLASS
5523             (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5524                                  (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5525             FR64X)>;
5526  def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5527            (COPY_TO_REGCLASS
5528             (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5529                                   (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5530             FR64X)>;
5531
5532  def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5533            (COPY_TO_REGCLASS
5534             (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5535                                  (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5536             FR32X)>;
5537  def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5538            (COPY_TO_REGCLASS
5539             (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5540                                 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5541             FR32X)>;
5542  def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5543            (COPY_TO_REGCLASS
5544             (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5545                                  (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5546             FR32X)>;
5547  def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5548            (COPY_TO_REGCLASS
5549             (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5550                                   (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5551             FR32X)>;
5552}
5553
5554multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5555                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5556  let ExeDomain = _.ExeDomain in {
5557  defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5558                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5559                  "$src2, $src1", "$src1, $src2",
5560                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5561                  EVEX_4V, Sched<[sched]>;
5562  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5563                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5564                  "$src2, $src1", "$src1, $src2",
5565                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
5566                  EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5567  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5568                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5569                   "${src2}"##_.BroadcastStr##", $src1",
5570                   "$src1, ${src2}"##_.BroadcastStr,
5571                   (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
5572                                              (_.ScalarLdFrag addr:$src2))),
5573                                              (i32 FROUND_CURRENT))>,
5574                   EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
5575  }
5576}
5577
5578multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5579                                   X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5580  let ExeDomain = _.ExeDomain in {
5581  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5582                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5583                  "$src2, $src1", "$src1, $src2",
5584                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5585                  Sched<[sched]>;
5586  defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5587                  (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5588                  "$src2, $src1", "$src1, $src2",
5589                  (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5590                          (i32 FROUND_CURRENT))>,
5591                  Sched<[sched.Folded, ReadAfterLd]>;
5592  }
5593}
5594
5595multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5596                                SDNode OpNode, SDNode OpNodeScal,
5597                                X86SchedWriteWidths sched> {
5598  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5599             avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5600                              EVEX_V512, EVEX_CD8<32, CD8VF>;
5601  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5602             avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5603                              EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5604  defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
5605             avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
5606                           EVEX_4V,EVEX_CD8<32, CD8VT1>;
5607  defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
5608             avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
5609                           EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5610
5611  // Define only if AVX512VL feature is present.
5612  let Predicates = [HasVLX] in {
5613    defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
5614                                   EVEX_V128, EVEX_CD8<32, CD8VF>;
5615    defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
5616                                   EVEX_V256, EVEX_CD8<32, CD8VF>;
5617    defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
5618                                   EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5619    defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
5620                                   EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5621  }
5622}
5623defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
5624                                    SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5625
5626//===----------------------------------------------------------------------===//
5627// AVX-512  VPTESTM instructions
5628//===----------------------------------------------------------------------===//
5629
5630multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5631                         X86FoldableSchedWrite sched, X86VectorVTInfo _,
5632                         string Name> {
5633  let ExeDomain = _.ExeDomain in {
5634  let isCommutable = 1 in
5635  defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5636                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5637                      "$src2, $src1", "$src1, $src2",
5638                   (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5639                           _.ImmAllZerosV)>,
5640                   EVEX_4V, Sched<[sched]>;
5641  defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5642                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5643                       "$src2, $src1", "$src1, $src2",
5644                   (OpNode (bitconvert
5645                            (_.i64VT (and _.RC:$src1,
5646                                          (bitconvert (_.LdFrag addr:$src2))))),
5647                           _.ImmAllZerosV)>,
5648                   EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5649                   Sched<[sched.Folded, ReadAfterLd]>;
5650  }
5651
5652  // Patterns for compare with 0 that just use the same source twice.
5653  def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5654            (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
5655                                      _.RC:$src, _.RC:$src))>;
5656
5657  def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5658            (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
5659                                      _.KRC:$mask, _.RC:$src, _.RC:$src))>;
5660}
5661
5662multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5663                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5664  let ExeDomain = _.ExeDomain in
5665  defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5666                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5667                    "${src2}"##_.BroadcastStr##", $src1",
5668                    "$src1, ${src2}"##_.BroadcastStr,
5669                    (OpNode (and _.RC:$src1,
5670                                       (X86VBroadcast
5671                                        (_.ScalarLdFrag addr:$src2))),
5672                            _.ImmAllZerosV)>,
5673                    EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5674                    Sched<[sched.Folded, ReadAfterLd]>;
5675}
5676
5677// Use 512bit version to implement 128/256 bit in case NoVLX.
5678multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
5679                                  X86VectorVTInfo _, string Name> {
5680  def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5681                           _.ImmAllZerosV)),
5682            (_.KVT (COPY_TO_REGCLASS
5683                     (!cast<Instruction>(Name # "Zrr")
5684                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5685                                      _.RC:$src1, _.SubRegIdx),
5686                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5687                                      _.RC:$src2, _.SubRegIdx)),
5688                   _.KRC))>;
5689
5690  def : Pat<(_.KVT (and _.KRC:$mask,
5691                        (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))),
5692                                _.ImmAllZerosV))),
5693            (COPY_TO_REGCLASS
5694             (!cast<Instruction>(Name # "Zrrk")
5695              (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5696              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5697                             _.RC:$src1, _.SubRegIdx),
5698              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5699                             _.RC:$src2, _.SubRegIdx)),
5700             _.KRC)>;
5701
5702  def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5703            (_.KVT (COPY_TO_REGCLASS
5704                     (!cast<Instruction>(Name # "Zrr")
5705                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5706                                      _.RC:$src, _.SubRegIdx),
5707                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5708                                      _.RC:$src, _.SubRegIdx)),
5709                   _.KRC))>;
5710
5711  def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5712            (COPY_TO_REGCLASS
5713             (!cast<Instruction>(Name # "Zrrk")
5714              (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5715              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5716                             _.RC:$src, _.SubRegIdx),
5717              (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5718                             _.RC:$src, _.SubRegIdx)),
5719             _.KRC)>;
5720}
5721
5722multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5723                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5724  let Predicates  = [HasAVX512] in
5725  defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
5726           avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5727
5728  let Predicates = [HasAVX512, HasVLX] in {
5729  defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
5730              avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5731  defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
5732              avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5733  }
5734  let Predicates = [HasAVX512, NoVLX] in {
5735  defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
5736  defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
5737  }
5738}
5739
5740multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5741                            X86SchedWriteWidths sched> {
5742  defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
5743                                 avx512vl_i32_info>;
5744  defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
5745                                 avx512vl_i64_info>, VEX_W;
5746}
5747
5748multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5749                            PatFrag OpNode, X86SchedWriteWidths sched> {
5750  let Predicates = [HasBWI] in {
5751  defm WZ:    avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
5752                            v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5753  defm BZ:    avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
5754                            v64i8_info, NAME#"B">, EVEX_V512;
5755  }
5756  let Predicates = [HasVLX, HasBWI] in {
5757
5758  defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
5759                            v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5760  defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
5761                            v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5762  defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
5763                            v32i8x_info, NAME#"B">, EVEX_V256;
5764  defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
5765                            v16i8x_info, NAME#"B">, EVEX_V128;
5766  }
5767
5768  let Predicates = [HasAVX512, NoVLX] in {
5769  defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
5770  defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
5771  defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
5772  defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
5773  }
5774}
5775
5776// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
5777// as commutable here because we already canonicalized all zeros vectors to the
5778// RHS during lowering.
5779def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
5780                         (setcc node:$src1, node:$src2, SETEQ)>;
5781def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
5782                         (setcc node:$src1, node:$src2, SETNE)>;
5783
5784multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5785                                   PatFrag OpNode, X86SchedWriteWidths sched> :
5786  avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
5787  avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
5788
5789defm VPTESTM   : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
5790                                         SchedWriteVecLogic>, T8PD;
5791defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
5792                                         SchedWriteVecLogic>, T8XS;
5793
5794//===----------------------------------------------------------------------===//
5795// AVX-512  Shift instructions
5796//===----------------------------------------------------------------------===//
5797
5798multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5799                            string OpcodeStr, SDNode OpNode,
5800                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5801  let ExeDomain = _.ExeDomain in {
5802  defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5803                   (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5804                      "$src2, $src1", "$src1, $src2",
5805                   (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
5806                   Sched<[sched]>;
5807  defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5808                   (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5809                       "$src2, $src1", "$src1, $src2",
5810                   (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5811                          (i8 imm:$src2)))>,
5812                   Sched<[sched.Folded]>;
5813  }
5814}
5815
5816multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5817                             string OpcodeStr, SDNode OpNode,
5818                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5819  let ExeDomain = _.ExeDomain in
5820  defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5821                   (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5822      "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5823     (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
5824     EVEX_B, Sched<[sched.Folded]>;
5825}
5826
5827multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5828                            X86FoldableSchedWrite sched, ValueType SrcVT,
5829                            PatFrag bc_frag, X86VectorVTInfo _> {
5830   // src2 is always 128-bit
5831  let ExeDomain = _.ExeDomain in {
5832  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5833                   (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5834                      "$src2, $src1", "$src1, $src2",
5835                   (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5836                   AVX512BIBase, EVEX_4V, Sched<[sched]>;
5837  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5838                   (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5839                       "$src2, $src1", "$src1, $src2",
5840                   (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>,
5841                   AVX512BIBase,
5842                   EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
5843  }
5844}
5845
5846multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5847                              X86SchedWriteWidths sched, ValueType SrcVT,
5848                              PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo,
5849                              Predicate prd> {
5850  let Predicates = [prd] in
5851  defm Z    : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5852                               bc_frag, VTInfo.info512>, EVEX_V512,
5853                               EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5854  let Predicates = [prd, HasVLX] in {
5855  defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5856                               bc_frag, VTInfo.info256>, EVEX_V256,
5857                               EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5858  defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5859                               bc_frag, VTInfo.info128>, EVEX_V128,
5860                               EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5861  }
5862}
5863
5864multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5865                              string OpcodeStr, SDNode OpNode,
5866                              X86SchedWriteWidths sched,
5867                              bit NotEVEX2VEXConvertibleQ = 0> {
5868  defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5869                              bc_v4i32, avx512vl_i32_info, HasAVX512>;
5870  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5871  defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5872                              bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5873  defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5874                              bc_v2i64, avx512vl_i16_info, HasBWI>;
5875}
5876
5877multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5878                                  string OpcodeStr, SDNode OpNode,
5879                                  X86SchedWriteWidths sched,
5880                                  AVX512VLVectorVTInfo VTInfo> {
5881  let Predicates = [HasAVX512] in
5882  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5883                              sched.ZMM, VTInfo.info512>,
5884             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5885                               VTInfo.info512>, EVEX_V512;
5886  let Predicates = [HasAVX512, HasVLX] in {
5887  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5888                              sched.YMM, VTInfo.info256>,
5889             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5890                               VTInfo.info256>, EVEX_V256;
5891  defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5892                              sched.XMM, VTInfo.info128>,
5893             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5894                               VTInfo.info128>, EVEX_V128;
5895  }
5896}
5897
5898multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5899                              string OpcodeStr, SDNode OpNode,
5900                              X86SchedWriteWidths sched> {
5901  let Predicates = [HasBWI] in
5902  defm WZ:    avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5903                               sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5904  let Predicates = [HasVLX, HasBWI] in {
5905  defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5906                               sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5907  defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5908                               sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5909  }
5910}
5911
5912multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5913                               Format ImmFormR, Format ImmFormM,
5914                               string OpcodeStr, SDNode OpNode,
5915                               X86SchedWriteWidths sched,
5916                               bit NotEVEX2VEXConvertibleQ = 0> {
5917  defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5918                                 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5919  let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5920  defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5921                                 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5922}
5923
5924defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5925                                 SchedWriteVecShiftImm>,
5926             avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5927                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5928
5929defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5930                                 SchedWriteVecShiftImm>,
5931             avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5932                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5933
5934defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5935                                 SchedWriteVecShiftImm, 1>,
5936             avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5937                                SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5938
5939defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5940                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5941defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5942                                 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5943
5944defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5945                                SchedWriteVecShift>;
5946defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5947                                SchedWriteVecShift, 1>;
5948defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5949                                SchedWriteVecShift>;
5950
5951// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5952let Predicates = [HasAVX512, NoVLX] in {
5953  def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5954            (EXTRACT_SUBREG (v8i64
5955              (VPSRAQZrr
5956                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5957                 VR128X:$src2)), sub_ymm)>;
5958
5959  def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5960            (EXTRACT_SUBREG (v8i64
5961              (VPSRAQZrr
5962                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5963                 VR128X:$src2)), sub_xmm)>;
5964
5965  def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5966            (EXTRACT_SUBREG (v8i64
5967              (VPSRAQZri
5968                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5969                 imm:$src2)), sub_ymm)>;
5970
5971  def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5972            (EXTRACT_SUBREG (v8i64
5973              (VPSRAQZri
5974                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5975                 imm:$src2)), sub_xmm)>;
5976}
5977
5978//===-------------------------------------------------------------------===//
5979// Variable Bit Shifts
5980//===-------------------------------------------------------------------===//
5981
5982multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5983                            X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5984  let ExeDomain = _.ExeDomain in {
5985  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5986                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5987                      "$src2, $src1", "$src1, $src2",
5988                   (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5989                   AVX5128IBase, EVEX_4V, Sched<[sched]>;
5990  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5991                   (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5992                       "$src2, $src1", "$src1, $src2",
5993                   (_.VT (OpNode _.RC:$src1,
5994                   (_.VT (bitconvert (_.LdFrag addr:$src2)))))>,
5995                   AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5996                   Sched<[sched.Folded, ReadAfterLd]>;
5997  }
5998}
5999
6000multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6001                               X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6002  let ExeDomain = _.ExeDomain in
6003  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6004                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6005                    "${src2}"##_.BroadcastStr##", $src1",
6006                    "$src1, ${src2}"##_.BroadcastStr,
6007                    (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
6008                                                (_.ScalarLdFrag addr:$src2)))))>,
6009                    AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6010                    Sched<[sched.Folded, ReadAfterLd]>;
6011}
6012
6013multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6014                                  X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6015  let Predicates  = [HasAVX512] in
6016  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6017           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6018
6019  let Predicates = [HasAVX512, HasVLX] in {
6020  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6021              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6022  defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6023              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6024  }
6025}
6026
6027multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6028                                  SDNode OpNode, X86SchedWriteWidths sched> {
6029  defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6030                                 avx512vl_i32_info>;
6031  defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6032                                 avx512vl_i64_info>, VEX_W;
6033}
6034
6035// Use 512bit version to implement 128/256 bit in case NoVLX.
6036multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6037                                     SDNode OpNode, list<Predicate> p> {
6038  let Predicates = p in {
6039  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6040                                  (_.info256.VT _.info256.RC:$src2))),
6041            (EXTRACT_SUBREG
6042                (!cast<Instruction>(OpcodeStr#"Zrr")
6043                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6044                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6045             sub_ymm)>;
6046
6047  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6048                                  (_.info128.VT _.info128.RC:$src2))),
6049            (EXTRACT_SUBREG
6050                (!cast<Instruction>(OpcodeStr#"Zrr")
6051                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6052                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6053             sub_xmm)>;
6054  }
6055}
6056multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6057                              SDNode OpNode, X86SchedWriteWidths sched> {
6058  let Predicates = [HasBWI] in
6059  defm WZ:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6060              EVEX_V512, VEX_W;
6061  let Predicates = [HasVLX, HasBWI] in {
6062
6063  defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6064              EVEX_V256, VEX_W;
6065  defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6066              EVEX_V128, VEX_W;
6067  }
6068}
6069
6070defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
6071              avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
6072
6073defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
6074              avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
6075
6076defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
6077              avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
6078
6079defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6080defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6081
6082defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
6083defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
6084defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
6085defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
6086
6087// Special handing for handling VPSRAV intrinsics.
6088multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
6089                                         list<Predicate> p> {
6090  let Predicates = p in {
6091    def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
6092              (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
6093               _.RC:$src2)>;
6094    def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
6095              (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
6096               _.RC:$src1, addr:$src2)>;
6097    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6098                     (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
6099              (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
6100               _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
6101    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6102                     (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6103                     _.RC:$src0)),
6104              (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
6105               _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6106    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6107                     (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
6108              (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
6109               _.RC:$src1, _.RC:$src2)>;
6110    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6111                     (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
6112                     _.ImmAllZerosV)),
6113              (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
6114               _.RC:$src1, addr:$src2)>;
6115  }
6116}
6117
6118multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
6119                                         list<Predicate> p> :
6120           avx512_var_shift_int_lowering<InstrStr, _, p> {
6121  let Predicates = p in {
6122    def : Pat<(_.VT (X86vsrav _.RC:$src1,
6123                     (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
6124              (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
6125               _.RC:$src1, addr:$src2)>;
6126    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6127                     (X86vsrav _.RC:$src1,
6128                      (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6129                     _.RC:$src0)),
6130              (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
6131               _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6132    def : Pat<(_.VT (vselect _.KRCWM:$mask,
6133                     (X86vsrav _.RC:$src1,
6134                      (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6135                     _.ImmAllZerosV)),
6136              (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
6137               _.RC:$src1, addr:$src2)>;
6138  }
6139}
6140
6141defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
6142defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
6143defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
6144defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
6145defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
6146defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
6147defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
6148defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
6149defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
6150
6151// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6152let Predicates = [HasAVX512, NoVLX] in {
6153  def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6154            (EXTRACT_SUBREG (v8i64
6155              (VPROLVQZrr
6156                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6157                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6158                       sub_xmm)>;
6159  def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6160            (EXTRACT_SUBREG (v8i64
6161              (VPROLVQZrr
6162                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6163                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6164                       sub_ymm)>;
6165
6166  def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6167            (EXTRACT_SUBREG (v16i32
6168              (VPROLVDZrr
6169                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6170                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6171                        sub_xmm)>;
6172  def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6173            (EXTRACT_SUBREG (v16i32
6174              (VPROLVDZrr
6175                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6176                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6177                        sub_ymm)>;
6178
6179  def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6180            (EXTRACT_SUBREG (v8i64
6181              (VPROLQZri
6182                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6183                        imm:$src2)), sub_xmm)>;
6184  def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6185            (EXTRACT_SUBREG (v8i64
6186              (VPROLQZri
6187                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6188                       imm:$src2)), sub_ymm)>;
6189
6190  def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6191            (EXTRACT_SUBREG (v16i32
6192              (VPROLDZri
6193                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6194                        imm:$src2)), sub_xmm)>;
6195  def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6196            (EXTRACT_SUBREG (v16i32
6197              (VPROLDZri
6198                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6199                        imm:$src2)), sub_ymm)>;
6200}
6201
6202// Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6203let Predicates = [HasAVX512, NoVLX] in {
6204  def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6205            (EXTRACT_SUBREG (v8i64
6206              (VPRORVQZrr
6207                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6208                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6209                       sub_xmm)>;
6210  def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6211            (EXTRACT_SUBREG (v8i64
6212              (VPRORVQZrr
6213                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6214                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6215                       sub_ymm)>;
6216
6217  def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6218            (EXTRACT_SUBREG (v16i32
6219              (VPRORVDZrr
6220                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6221                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6222                        sub_xmm)>;
6223  def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6224            (EXTRACT_SUBREG (v16i32
6225              (VPRORVDZrr
6226                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6227                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6228                        sub_ymm)>;
6229
6230  def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6231            (EXTRACT_SUBREG (v8i64
6232              (VPRORQZri
6233                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6234                        imm:$src2)), sub_xmm)>;
6235  def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6236            (EXTRACT_SUBREG (v8i64
6237              (VPRORQZri
6238                (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6239                       imm:$src2)), sub_ymm)>;
6240
6241  def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6242            (EXTRACT_SUBREG (v16i32
6243              (VPRORDZri
6244                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6245                        imm:$src2)), sub_xmm)>;
6246  def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6247            (EXTRACT_SUBREG (v16i32
6248              (VPRORDZri
6249                (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6250                        imm:$src2)), sub_ymm)>;
6251}
6252
6253//===-------------------------------------------------------------------===//
6254// 1-src variable permutation VPERMW/D/Q
6255//===-------------------------------------------------------------------===//
6256
6257multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6258                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6259  let Predicates  = [HasAVX512] in
6260  defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6261           avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6262
6263  let Predicates = [HasAVX512, HasVLX] in
6264  defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6265              avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6266}
6267
6268multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6269                                 string OpcodeStr, SDNode OpNode,
6270                                 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6271  let Predicates = [HasAVX512] in
6272  defm Z:    avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6273                              sched, VTInfo.info512>,
6274             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6275                               sched, VTInfo.info512>, EVEX_V512;
6276  let Predicates = [HasAVX512, HasVLX] in
6277  defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6278                              sched, VTInfo.info256>,
6279             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6280                               sched, VTInfo.info256>, EVEX_V256;
6281}
6282
6283multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6284                              Predicate prd, SDNode OpNode,
6285                              X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6286  let Predicates = [prd] in
6287  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6288              EVEX_V512 ;
6289  let Predicates = [HasVLX, prd] in {
6290  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6291              EVEX_V256 ;
6292  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6293              EVEX_V128 ;
6294  }
6295}
6296
6297defm VPERMW  : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6298                               WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6299defm VPERMB  : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6300                               WriteVarShuffle256, avx512vl_i8_info>;
6301
6302defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6303                                    WriteVarShuffle256, avx512vl_i32_info>;
6304defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6305                                    WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6306defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6307                                     WriteFVarShuffle256, avx512vl_f32_info>;
6308defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6309                                     WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6310
6311defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6312                             X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6313                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6314defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6315                             X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6316                             EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6317
6318//===----------------------------------------------------------------------===//
6319// AVX-512 - VPERMIL
6320//===----------------------------------------------------------------------===//
6321
6322multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6323                             X86FoldableSchedWrite sched, X86VectorVTInfo _,
6324                             X86VectorVTInfo Ctrl> {
6325  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6326                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6327                  "$src2, $src1", "$src1, $src2",
6328                  (_.VT (OpNode _.RC:$src1,
6329                               (Ctrl.VT Ctrl.RC:$src2)))>,
6330                  T8PD, EVEX_4V, Sched<[sched]>;
6331  defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6332                  (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6333                  "$src2, $src1", "$src1, $src2",
6334                  (_.VT (OpNode
6335                           _.RC:$src1,
6336                           (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
6337                  T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6338                  Sched<[sched.Folded, ReadAfterLd]>;
6339  defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6340                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6341                   "${src2}"##_.BroadcastStr##", $src1",
6342                   "$src1, ${src2}"##_.BroadcastStr,
6343                   (_.VT (OpNode
6344                            _.RC:$src1,
6345                            (Ctrl.VT (X86VBroadcast
6346                                       (Ctrl.ScalarLdFrag addr:$src2)))))>,
6347                   T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6348                   Sched<[sched.Folded, ReadAfterLd]>;
6349}
6350
6351multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6352                                    X86SchedWriteWidths sched,
6353                                    AVX512VLVectorVTInfo _,
6354                                    AVX512VLVectorVTInfo Ctrl> {
6355  let Predicates = [HasAVX512] in {
6356    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6357                                  _.info512, Ctrl.info512>, EVEX_V512;
6358  }
6359  let Predicates = [HasAVX512, HasVLX] in {
6360    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6361                                  _.info128, Ctrl.info128>, EVEX_V128;
6362    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6363                                  _.info256, Ctrl.info256>, EVEX_V256;
6364  }
6365}
6366
6367multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6368                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6369  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6370                                      _, Ctrl>;
6371  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6372                                    X86VPermilpi, SchedWriteFShuffle, _>,
6373                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6374}
6375
6376let ExeDomain = SSEPackedSingle in
6377defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6378                               avx512vl_i32_info>;
6379let ExeDomain = SSEPackedDouble in
6380defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6381                               avx512vl_i64_info>, VEX_W1X;
6382
6383//===----------------------------------------------------------------------===//
6384// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6385//===----------------------------------------------------------------------===//
6386
6387defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6388                             X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6389                             EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6390defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6391                                  X86PShufhw, SchedWriteShuffle>,
6392                                  EVEX, AVX512XSIi8Base;
6393defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6394                                  X86PShuflw, SchedWriteShuffle>,
6395                                  EVEX, AVX512XDIi8Base;
6396
6397//===----------------------------------------------------------------------===//
6398// AVX-512 - VPSHUFB
6399//===----------------------------------------------------------------------===//
6400
6401multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6402                               X86SchedWriteWidths sched> {
6403  let Predicates = [HasBWI] in
6404  defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6405                              EVEX_V512;
6406
6407  let Predicates = [HasVLX, HasBWI] in {
6408  defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6409                              EVEX_V256;
6410  defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6411                              EVEX_V128;
6412  }
6413}
6414
6415defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6416                                  SchedWriteVarShuffle>, VEX_WIG;
6417
6418//===----------------------------------------------------------------------===//
6419// Move Low to High and High to Low packed FP Instructions
6420//===----------------------------------------------------------------------===//
6421
6422def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6423          (ins VR128X:$src1, VR128X:$src2),
6424          "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6425          [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6426          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6427let isCommutable = 1 in
6428def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6429          (ins VR128X:$src1, VR128X:$src2),
6430          "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6431          [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6432          Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6433
6434//===----------------------------------------------------------------------===//
6435// VMOVHPS/PD VMOVLPS Instructions
6436// All patterns was taken from SSS implementation.
6437//===----------------------------------------------------------------------===//
6438
6439multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6440                                  SDPatternOperator OpNode,
6441                                  X86VectorVTInfo _> {
6442  let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6443  def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6444                  (ins _.RC:$src1, f64mem:$src2),
6445                  !strconcat(OpcodeStr,
6446                             "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6447                  [(set _.RC:$dst,
6448                     (OpNode _.RC:$src1,
6449                       (_.VT (bitconvert
6450                         (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6451                  Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V;
6452}
6453
6454// No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6455// SSE1. And MOVLPS pattern is even more complex.
6456defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6457                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6458defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6459                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6460defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6461                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6462defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6463                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6464
6465let Predicates = [HasAVX512] in {
6466  // VMOVHPD patterns
6467  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6468                    (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6469           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6470}
6471
6472let SchedRW = [WriteFStore] in {
6473def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6474                       (ins f64mem:$dst, VR128X:$src),
6475                       "vmovhps\t{$src, $dst|$dst, $src}",
6476                       [(store (f64 (extractelt
6477                                     (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6478                                                (bc_v2f64 (v4f32 VR128X:$src))),
6479                                     (iPTR 0))), addr:$dst)]>,
6480                       EVEX, EVEX_CD8<32, CD8VT2>;
6481def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6482                       (ins f64mem:$dst, VR128X:$src),
6483                       "vmovhpd\t{$src, $dst|$dst, $src}",
6484                       [(store (f64 (extractelt
6485                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6486                                     (iPTR 0))), addr:$dst)]>,
6487                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6488def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6489                       (ins f64mem:$dst, VR128X:$src),
6490                       "vmovlps\t{$src, $dst|$dst, $src}",
6491                       [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
6492                                     (iPTR 0))), addr:$dst)]>,
6493                       EVEX, EVEX_CD8<32, CD8VT2>;
6494def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6495                       (ins f64mem:$dst, VR128X:$src),
6496                       "vmovlpd\t{$src, $dst|$dst, $src}",
6497                       [(store (f64 (extractelt (v2f64 VR128X:$src),
6498                                     (iPTR 0))), addr:$dst)]>,
6499                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6500} // SchedRW
6501
6502let Predicates = [HasAVX512] in {
6503  // VMOVHPD patterns
6504  def : Pat<(store (f64 (extractelt
6505                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6506                           (iPTR 0))), addr:$dst),
6507           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6508}
6509//===----------------------------------------------------------------------===//
6510// FMA - Fused Multiply Operations
6511//
6512
6513multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6514                               X86FoldableSchedWrite sched,
6515                               X86VectorVTInfo _, string Suff> {
6516  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6517  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6518          (ins _.RC:$src2, _.RC:$src3),
6519          OpcodeStr, "$src3, $src2", "$src2, $src3",
6520          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6521          AVX512FMA3Base, Sched<[sched]>;
6522
6523  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6524          (ins _.RC:$src2, _.MemOp:$src3),
6525          OpcodeStr, "$src3, $src2", "$src2, $src3",
6526          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6527          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6528
6529  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6530            (ins _.RC:$src2, _.ScalarMemOp:$src3),
6531            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
6532            !strconcat("$src2, ${src3}", _.BroadcastStr ),
6533            (OpNode _.RC:$src2,
6534             _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6535             AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6536  }
6537}
6538
6539multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6540                                 X86FoldableSchedWrite sched,
6541                                 X86VectorVTInfo _, string Suff> {
6542  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6543  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6544          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6545          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6546          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
6547          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6548}
6549
6550multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6552                                   AVX512VLVectorVTInfo _, string Suff> {
6553  let Predicates = [HasAVX512] in {
6554    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6555                                      _.info512, Suff>,
6556                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6557                                        _.info512, Suff>,
6558                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6559  }
6560  let Predicates = [HasVLX, HasAVX512] in {
6561    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6562                                    _.info256, Suff>,
6563                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6564    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6565                                    _.info128, Suff>,
6566                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6567  }
6568}
6569
6570multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6571                              SDNode OpNodeRnd> {
6572    defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6573                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6574    defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6575                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6576                                      VEX_W;
6577}
6578
6579defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6580defm VFMSUB213    : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6581defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6582defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6583defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6584defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6585
6586
6587multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6588                               X86FoldableSchedWrite sched,
6589                               X86VectorVTInfo _, string Suff> {
6590  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6591  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6592          (ins _.RC:$src2, _.RC:$src3),
6593          OpcodeStr, "$src3, $src2", "$src2, $src3",
6594          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6595          vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6596
6597  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6598          (ins _.RC:$src2, _.MemOp:$src3),
6599          OpcodeStr, "$src3, $src2", "$src2, $src3",
6600          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6601          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6602
6603  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6604         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6605         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6606         "$src2, ${src3}"##_.BroadcastStr,
6607         (_.VT (OpNode _.RC:$src2,
6608                      (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6609                      _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6610         Sched<[sched.Folded, ReadAfterLd]>;
6611  }
6612}
6613
6614multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6615                                 X86FoldableSchedWrite sched,
6616                                 X86VectorVTInfo _, string Suff> {
6617  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6618  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6619          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6620          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6621          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6622          1, 1, vselect, 1>,
6623          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6624}
6625
6626multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6627                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6628                                   AVX512VLVectorVTInfo _, string Suff> {
6629  let Predicates = [HasAVX512] in {
6630    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6631                                      _.info512, Suff>,
6632                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6633                                        _.info512, Suff>,
6634                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6635  }
6636  let Predicates = [HasVLX, HasAVX512] in {
6637    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6638                                    _.info256, Suff>,
6639                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6640    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6641                                    _.info128, Suff>,
6642                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6643  }
6644}
6645
6646multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6647                              SDNode OpNodeRnd > {
6648    defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6649                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6650    defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6651                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6652                                      VEX_W;
6653}
6654
6655defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6656defm VFMSUB231    : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6657defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6658defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6659defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6660defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6661
6662multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6663                               X86FoldableSchedWrite sched,
6664                               X86VectorVTInfo _, string Suff> {
6665  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6666  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6667          (ins _.RC:$src2, _.RC:$src3),
6668          OpcodeStr, "$src3, $src2", "$src2, $src3",
6669          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6670          AVX512FMA3Base, Sched<[sched]>;
6671
6672  // Pattern is 312 order so that the load is in a different place from the
6673  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6674  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6675          (ins _.RC:$src2, _.MemOp:$src3),
6676          OpcodeStr, "$src3, $src2", "$src2, $src3",
6677          (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6678          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
6679
6680  // Pattern is 312 order so that the load is in a different place from the
6681  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6682  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6683         (ins _.RC:$src2, _.ScalarMemOp:$src3),
6684         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6685         "$src2, ${src3}"##_.BroadcastStr,
6686         (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6687                       _.RC:$src1, _.RC:$src2)), 1, 0>,
6688         AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
6689  }
6690}
6691
6692multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6693                                 X86FoldableSchedWrite sched,
6694                                 X86VectorVTInfo _, string Suff> {
6695  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6696  defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6697          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6698          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6699          (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6700          1, 1, vselect, 1>,
6701          AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6702}
6703
6704multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6705                                   SDNode OpNodeRnd, X86SchedWriteWidths sched,
6706                                   AVX512VLVectorVTInfo _, string Suff> {
6707  let Predicates = [HasAVX512] in {
6708    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6709                                      _.info512, Suff>,
6710                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6711                                        _.info512, Suff>,
6712                              EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6713  }
6714  let Predicates = [HasVLX, HasAVX512] in {
6715    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6716                                    _.info256, Suff>,
6717                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6718    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6719                                    _.info128, Suff>,
6720                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6721  }
6722}
6723
6724multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6725                              SDNode OpNodeRnd > {
6726    defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6727                                      SchedWriteFMA, avx512vl_f32_info, "PS">;
6728    defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6729                                      SchedWriteFMA, avx512vl_f64_info, "PD">,
6730                                      VEX_W;
6731}
6732
6733defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6734defm VFMSUB132    : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6735defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6736defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6737defm VFNMADD132   : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6738defm VFNMSUB132   : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6739
6740// Scalar FMA
6741multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6742                               dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6743let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6744  defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6745          (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6746          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6747          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
6748
6749  let mayLoad = 1 in
6750  defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6751          (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6752          "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6753          AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6754
6755  defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6756         (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6757         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6758         AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6759
6760  let isCodeGenOnly = 1, isCommutable = 1 in {
6761    def r     : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6762                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6763                     !strconcat(OpcodeStr,
6764                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6765                     !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
6766    def m     : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6767                    (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6768                    !strconcat(OpcodeStr,
6769                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6770                    [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>;
6771
6772    def rb    : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6773                     (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6774                     !strconcat(OpcodeStr,
6775                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6776                     !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6777                     Sched<[SchedWriteFMA.Scl]>;
6778  }// isCodeGenOnly = 1
6779}// Constraints = "$src1 = $dst"
6780}
6781
6782multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6783                            string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6784                            X86VectorVTInfo _, string SUFF> {
6785  let ExeDomain = _.ExeDomain in {
6786  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6787                // Operands for intrinsic are in 123 order to preserve passthu
6788                // semantics.
6789                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6790                         _.FRC:$src3))),
6791                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6792                         (_.ScalarLdFrag addr:$src3)))),
6793                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6794                         _.FRC:$src3, (i32 imm:$rc)))), 0>;
6795
6796  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6797                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6798                                          _.FRC:$src1))),
6799                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6800                            (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6801                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6802                         _.FRC:$src1, (i32 imm:$rc)))), 1>;
6803
6804  // One pattern is 312 order so that the load is in a different place from the
6805  // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6806  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6807                (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6808                         _.FRC:$src2))),
6809                (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6810                                 _.FRC:$src1, _.FRC:$src2))),
6811                (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6812                         _.FRC:$src2, (i32 imm:$rc)))), 1>;
6813  }
6814}
6815
6816multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6817                        string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6818  let Predicates = [HasAVX512] in {
6819    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6820                                 OpNodeRnd, f32x_info, "SS">,
6821                                 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6822    defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6823                                 OpNodeRnd, f64x_info, "SD">,
6824                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6825  }
6826}
6827
6828defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
6829defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6830defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6831defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6832
6833multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6834                                      string Suffix, SDNode Move,
6835                                      X86VectorVTInfo _, PatLeaf ZeroFP> {
6836  let Predicates = [HasAVX512] in {
6837    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6838                (Op _.FRC:$src2,
6839                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6840                    _.FRC:$src3))))),
6841              (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6842               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6843               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6844
6845    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6846                (Op _.FRC:$src2, _.FRC:$src3,
6847                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6848              (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6849               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6850               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6851
6852    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6853                (Op _.FRC:$src2,
6854                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6855                    (_.ScalarLdFrag addr:$src3)))))),
6856              (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6857               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6858               addr:$src3)>;
6859
6860    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6861                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6862                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6863              (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6864               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6865               addr:$src3)>;
6866
6867    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6868                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6869                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6870              (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6871               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6872               addr:$src3)>;
6873
6874    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6875               (X86selects VK1WM:$mask,
6876                (Op _.FRC:$src2,
6877                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6878                    _.FRC:$src3),
6879                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6880              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6881               VR128X:$src1, VK1WM:$mask,
6882               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6883               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6884
6885    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6886               (X86selects VK1WM:$mask,
6887                (Op _.FRC:$src2,
6888                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6889                    (_.ScalarLdFrag addr:$src3)),
6890                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6891              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6892               VR128X:$src1, VK1WM:$mask,
6893               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6894
6895    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6896               (X86selects VK1WM:$mask,
6897                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6898                    (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6899                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6900              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6901               VR128X:$src1, VK1WM:$mask,
6902               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6903
6904    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6905               (X86selects VK1WM:$mask,
6906                (Op _.FRC:$src2, _.FRC:$src3,
6907                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6908                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6909              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6910               VR128X:$src1, VK1WM:$mask,
6911               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6912               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6913
6914    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6915               (X86selects VK1WM:$mask,
6916                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6917                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6918                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6919              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6920               VR128X:$src1, VK1WM:$mask,
6921               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6922
6923    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6924               (X86selects VK1WM:$mask,
6925                (Op _.FRC:$src2,
6926                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6927                    _.FRC:$src3),
6928                (_.EltVT ZeroFP)))))),
6929              (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6930               VR128X:$src1, VK1WM:$mask,
6931               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6932               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6933
6934    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6935               (X86selects VK1WM:$mask,
6936                (Op _.FRC:$src2, _.FRC:$src3,
6937                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6938                (_.EltVT ZeroFP)))))),
6939              (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6940               VR128X:$src1, VK1WM:$mask,
6941               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6942               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6943
6944    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6945               (X86selects VK1WM:$mask,
6946                (Op _.FRC:$src2,
6947                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6948                    (_.ScalarLdFrag addr:$src3)),
6949                (_.EltVT ZeroFP)))))),
6950              (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6951               VR128X:$src1, VK1WM:$mask,
6952               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6953
6954    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6955               (X86selects VK1WM:$mask,
6956                (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6957                    _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6958                (_.EltVT ZeroFP)))))),
6959              (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6960               VR128X:$src1, VK1WM:$mask,
6961               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6962
6963    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6964               (X86selects VK1WM:$mask,
6965                (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6966                    (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6967                (_.EltVT ZeroFP)))))),
6968              (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6969               VR128X:$src1, VK1WM:$mask,
6970               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6971
6972    // Patterns with rounding mode.
6973    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6974                (RndOp _.FRC:$src2,
6975                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6976                       _.FRC:$src3, (i32 imm:$rc)))))),
6977              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6978               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6979               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
6980
6981    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6982                (RndOp _.FRC:$src2, _.FRC:$src3,
6983                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6984                       (i32 imm:$rc)))))),
6985              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6986               VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6987               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
6988
6989    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6990               (X86selects VK1WM:$mask,
6991                (RndOp _.FRC:$src2,
6992                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6993                       _.FRC:$src3, (i32 imm:$rc)),
6994                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6995              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6996               VR128X:$src1, VK1WM:$mask,
6997               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6998               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
6999
7000    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7001               (X86selects VK1WM:$mask,
7002                (RndOp _.FRC:$src2, _.FRC:$src3,
7003                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7004                       (i32 imm:$rc)),
7005                (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7006              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7007               VR128X:$src1, VK1WM:$mask,
7008               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7009               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7010
7011    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7012               (X86selects VK1WM:$mask,
7013                (RndOp _.FRC:$src2,
7014                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7015                       _.FRC:$src3, (i32 imm:$rc)),
7016                (_.EltVT ZeroFP)))))),
7017              (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7018               VR128X:$src1, VK1WM:$mask,
7019               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7020               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7021
7022    def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7023               (X86selects VK1WM:$mask,
7024                (RndOp _.FRC:$src2, _.FRC:$src3,
7025                       (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7026                       (i32 imm:$rc)),
7027                (_.EltVT ZeroFP)))))),
7028              (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7029               VR128X:$src1, VK1WM:$mask,
7030               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7031               (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7032  }
7033}
7034
7035defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
7036                                  X86Movss, v4f32x_info, fp32imm0>;
7037defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
7038                                  X86Movss, v4f32x_info, fp32imm0>;
7039defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
7040                                  X86Movss, v4f32x_info, fp32imm0>;
7041defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
7042                                  X86Movss, v4f32x_info, fp32imm0>;
7043
7044defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
7045                                  X86Movsd, v2f64x_info, fp64imm0>;
7046defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
7047                                  X86Movsd, v2f64x_info, fp64imm0>;
7048defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
7049                                  X86Movsd, v2f64x_info, fp64imm0>;
7050defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
7051                                  X86Movsd, v2f64x_info, fp64imm0>;
7052
7053//===----------------------------------------------------------------------===//
7054// AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7055//===----------------------------------------------------------------------===//
7056let Constraints = "$src1 = $dst" in {
7057multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7058                             X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7059  // NOTE: The SDNode have the multiply operands first with the add last.
7060  // This enables commuted load patterns to be autogenerated by tablegen.
7061  let ExeDomain = _.ExeDomain in {
7062  defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7063          (ins _.RC:$src2, _.RC:$src3),
7064          OpcodeStr, "$src3, $src2", "$src2, $src3",
7065          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7066         AVX512FMA3Base, Sched<[sched]>;
7067
7068  defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7069          (ins _.RC:$src2, _.MemOp:$src3),
7070          OpcodeStr, "$src3, $src2", "$src2, $src3",
7071          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7072          AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>;
7073
7074  defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7075            (ins _.RC:$src2, _.ScalarMemOp:$src3),
7076            OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
7077            !strconcat("$src2, ${src3}", _.BroadcastStr ),
7078            (OpNode _.RC:$src2,
7079                    (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
7080                    _.RC:$src1)>,
7081            AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
7082  }
7083}
7084} // Constraints = "$src1 = $dst"
7085
7086multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7087                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7088  let Predicates = [HasIFMA] in {
7089    defm Z      : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7090                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7091  }
7092  let Predicates = [HasVLX, HasIFMA] in {
7093    defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7094                      EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7095    defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7096                      EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7097  }
7098}
7099
7100defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7101                                         SchedWriteVecIMul, avx512vl_i64_info>,
7102                                         VEX_W;
7103defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7104                                         SchedWriteVecIMul, avx512vl_i64_info>,
7105                                         VEX_W;
7106
7107//===----------------------------------------------------------------------===//
7108// AVX-512  Scalar convert from sign integer to float/double
7109//===----------------------------------------------------------------------===//
7110
7111multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
7112                    RegisterClass SrcRC, X86VectorVTInfo DstVT,
7113                    X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7114  let hasSideEffects = 0 in {
7115    def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7116              (ins DstVT.FRC:$src1, SrcRC:$src),
7117              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7118              EVEX_4V, Sched<[sched]>;
7119    let mayLoad = 1 in
7120      def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7121              (ins DstVT.FRC:$src1, x86memop:$src),
7122              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7123              EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7124  } // hasSideEffects = 0
7125  let isCodeGenOnly = 1 in {
7126    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7127                  (ins DstVT.RC:$src1, SrcRC:$src2),
7128                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7129                  [(set DstVT.RC:$dst,
7130                        (OpNode (DstVT.VT DstVT.RC:$src1),
7131                                 SrcRC:$src2,
7132                                 (i32 FROUND_CURRENT)))]>,
7133                 EVEX_4V, Sched<[sched]>;
7134
7135    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7136                  (ins DstVT.RC:$src1, x86memop:$src2),
7137                  !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7138                  [(set DstVT.RC:$dst,
7139                        (OpNode (DstVT.VT DstVT.RC:$src1),
7140                                 (ld_frag addr:$src2),
7141                                 (i32 FROUND_CURRENT)))]>,
7142                  EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
7143  }//isCodeGenOnly = 1
7144}
7145
7146multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7147                               X86FoldableSchedWrite sched, RegisterClass SrcRC,
7148                               X86VectorVTInfo DstVT, string asm> {
7149  def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7150              (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7151              !strconcat(asm,
7152                  "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7153              [(set DstVT.RC:$dst,
7154                    (OpNode (DstVT.VT DstVT.RC:$src1),
7155                             SrcRC:$src2,
7156                             (i32 imm:$rc)))]>,
7157              EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7158}
7159
7160multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
7161                                X86FoldableSchedWrite sched,
7162                                RegisterClass SrcRC, X86VectorVTInfo DstVT,
7163                                X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7164  defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
7165              avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7166                            ld_frag, asm>, VEX_LIG;
7167}
7168
7169let Predicates = [HasAVX512] in {
7170defm VCVTSI2SSZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
7171                                 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
7172                                 XS, EVEX_CD8<32, CD8VT1>;
7173defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
7174                                 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
7175                                 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7176defm VCVTSI2SDZ  : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
7177                                 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
7178                                 XD, EVEX_CD8<32, CD8VT1>;
7179defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
7180                                 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
7181                                 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7182
7183def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7184              (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7185def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7186              (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7187
7188def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7189          (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7190def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7191          (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7192def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7193          (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7194def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7195          (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7196
7197def : Pat<(f32 (sint_to_fp GR32:$src)),
7198          (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7199def : Pat<(f32 (sint_to_fp GR64:$src)),
7200          (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7201def : Pat<(f64 (sint_to_fp GR32:$src)),
7202          (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7203def : Pat<(f64 (sint_to_fp GR64:$src)),
7204          (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7205
7206defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
7207                                  v4f32x_info, i32mem, loadi32,
7208                                  "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
7209defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
7210                                  v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
7211                                  XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7212defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
7213                                  i32mem, loadi32, "cvtusi2sd{l}">,
7214                                  XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7215defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
7216                                  v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
7217                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7218
7219def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7220              (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7221def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7222              (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7223
7224def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7225          (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7226def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7227          (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7228def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7229          (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7230def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7231          (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7232
7233def : Pat<(f32 (uint_to_fp GR32:$src)),
7234          (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7235def : Pat<(f32 (uint_to_fp GR64:$src)),
7236          (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7237def : Pat<(f64 (uint_to_fp GR32:$src)),
7238          (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7239def : Pat<(f64 (uint_to_fp GR64:$src)),
7240          (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7241}
7242
7243//===----------------------------------------------------------------------===//
7244// AVX-512  Scalar convert from float/double to integer
7245//===----------------------------------------------------------------------===//
7246
7247multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7248                                  X86VectorVTInfo DstVT, SDNode OpNode,
7249                                  X86FoldableSchedWrite sched, string asm,
7250                                  string aliasStr,
7251                                  bit CodeGenOnly = 1> {
7252  let Predicates = [HasAVX512] in {
7253    def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7254                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7255                [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
7256                EVEX, VEX_LIG, Sched<[sched]>;
7257    def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7258                 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7259                 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
7260                 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7261                 Sched<[sched]>;
7262    let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7263    def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7264                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7265                [(set DstVT.RC:$dst, (OpNode
7266                      (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
7267                      (i32 FROUND_CURRENT)))]>,
7268                EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7269
7270    def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7271            (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7272    def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7273            (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7274  } // Predicates = [HasAVX512]
7275}
7276
7277multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
7278                                          X86VectorVTInfo DstVT, SDNode OpNode,
7279                                          X86FoldableSchedWrite sched, string asm,
7280                                          string aliasStr> :
7281  avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> {
7282  let Predicates = [HasAVX512] in {
7283    def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7284            (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7285                                            SrcVT.IntScalarMemOp:$src), 0, "att">;
7286  } // Predicates = [HasAVX512]
7287}
7288
7289// Convert float/double to signed/unsigned int 32/64
7290defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
7291                                   X86cvts2si, WriteCvtSS2I, "cvtss2si", "{l}">,
7292                                   XS, EVEX_CD8<32, CD8VT1>;
7293defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
7294                                   X86cvts2si, WriteCvtSS2I, "cvtss2si", "{q}">,
7295                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7296defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info,
7297                                   X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{l}">,
7298                                   XS, EVEX_CD8<32, CD8VT1>;
7299defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info,
7300                                   X86cvts2usi, WriteCvtSS2I, "cvtss2usi", "{q}">,
7301                                   XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7302defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
7303                                   X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{l}">,
7304                                   XD, EVEX_CD8<64, CD8VT1>;
7305defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
7306                                   X86cvts2si, WriteCvtSD2I, "cvtsd2si", "{q}">,
7307                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7308defm VCVTSD2USIZ:   avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info,
7309                                   X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7310                                   XD, EVEX_CD8<64, CD8VT1>;
7311defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info,
7312                                   X86cvts2usi, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7313                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7314
7315// The SSE version of these instructions are disabled for AVX512.
7316// Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
7317let Predicates = [HasAVX512] in {
7318  def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
7319            (VCVTSS2SIZrr_Int VR128X:$src)>;
7320  def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
7321            (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
7322  def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
7323            (VCVTSS2SI64Zrr_Int VR128X:$src)>;
7324  def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
7325            (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
7326  def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
7327            (VCVTSD2SIZrr_Int VR128X:$src)>;
7328  def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
7329            (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
7330  def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
7331            (VCVTSD2SI64Zrr_Int VR128X:$src)>;
7332  def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
7333            (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
7334} // HasAVX512
7335
7336// Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7337// which produce unnecessary vmovs{s,d} instructions
7338let Predicates = [HasAVX512] in {
7339def : Pat<(v4f32 (X86Movss
7340                   (v4f32 VR128X:$dst),
7341                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7342          (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7343
7344def : Pat<(v4f32 (X86Movss
7345                   (v4f32 VR128X:$dst),
7346                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7347          (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7348
7349def : Pat<(v4f32 (X86Movss
7350                   (v4f32 VR128X:$dst),
7351                   (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7352          (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7353
7354def : Pat<(v4f32 (X86Movss
7355                   (v4f32 VR128X:$dst),
7356                   (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7357          (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7358
7359def : Pat<(v2f64 (X86Movsd
7360                   (v2f64 VR128X:$dst),
7361                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7362          (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7363
7364def : Pat<(v2f64 (X86Movsd
7365                   (v2f64 VR128X:$dst),
7366                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7367          (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7368
7369def : Pat<(v2f64 (X86Movsd
7370                   (v2f64 VR128X:$dst),
7371                   (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7372          (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7373
7374def : Pat<(v2f64 (X86Movsd
7375                   (v2f64 VR128X:$dst),
7376                   (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7377          (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7378
7379def : Pat<(v4f32 (X86Movss
7380                   (v4f32 VR128X:$dst),
7381                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7382          (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7383
7384def : Pat<(v4f32 (X86Movss
7385                   (v4f32 VR128X:$dst),
7386                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7387          (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7388
7389def : Pat<(v4f32 (X86Movss
7390                   (v4f32 VR128X:$dst),
7391                   (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7392          (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7393
7394def : Pat<(v4f32 (X86Movss
7395                   (v4f32 VR128X:$dst),
7396                   (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7397          (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7398
7399def : Pat<(v2f64 (X86Movsd
7400                   (v2f64 VR128X:$dst),
7401                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7402          (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7403
7404def : Pat<(v2f64 (X86Movsd
7405                   (v2f64 VR128X:$dst),
7406                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7407          (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7408
7409def : Pat<(v2f64 (X86Movsd
7410                   (v2f64 VR128X:$dst),
7411                   (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7412          (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7413
7414def : Pat<(v2f64 (X86Movsd
7415                   (v2f64 VR128X:$dst),
7416                   (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7417          (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7418} // Predicates = [HasAVX512]
7419
7420// Convert float/double to signed/unsigned int 32/64 with truncation
7421multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7422                            X86VectorVTInfo _DstRC, SDNode OpNode,
7423                            SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7424                            string aliasStr, bit CodeGenOnly = 1>{
7425let Predicates = [HasAVX512] in {
7426  let isCodeGenOnly = 1 in {
7427  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7428              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7429              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7430              EVEX, Sched<[sched]>;
7431  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7432              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7433              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7434              EVEX, Sched<[sched.Folded, ReadAfterLd]>;
7435  }
7436
7437  def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7438            !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7439           [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7440                                 (i32 FROUND_CURRENT)))]>,
7441           EVEX, VEX_LIG, Sched<[sched]>;
7442  def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7443            !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7444            [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7445                                  (i32 FROUND_NO_EXC)))]>,
7446                                  EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
7447  let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7448  def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7449              (ins _SrcRC.IntScalarMemOp:$src),
7450              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7451              [(set _DstRC.RC:$dst, (OpNodeRnd
7452                                     (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src),
7453                                     (i32 FROUND_CURRENT)))]>,
7454              EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7455
7456  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7457          (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7458  def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7459          (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7460} //HasAVX512
7461}
7462
7463multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
7464                                     X86VectorVTInfo _SrcRC,
7465                                     X86VectorVTInfo _DstRC, SDNode OpNode,
7466                                     SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7467                                     string aliasStr> :
7468  avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched,
7469                   aliasStr, 0> {
7470let Predicates = [HasAVX512] in {
7471  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7472          (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7473                                          _SrcRC.IntScalarMemOp:$src), 0, "att">;
7474}
7475}
7476
7477defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7478                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{l}">,
7479                        XS, EVEX_CD8<32, CD8VT1>;
7480defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7481                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSS2I, "{q}">,
7482                        VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7483defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7484                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{l}">,
7485                        XD, EVEX_CD8<64, CD8VT1>;
7486defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7487                        fp_to_sint, X86cvtts2IntRnd, WriteCvtSD2I, "{q}">,
7488                        VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7489
7490defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
7491                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{l}">,
7492                        XS, EVEX_CD8<32, CD8VT1>;
7493defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
7494                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSS2I, "{q}">,
7495                        XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7496defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7497                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{l}">,
7498                        XD, EVEX_CD8<64, CD8VT1>;
7499defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7500                        fp_to_uint, X86cvtts2UIntRnd, WriteCvtSD2I, "{q}">,
7501                        XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7502
7503let Predicates = [HasAVX512] in {
7504  def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
7505            (VCVTTSS2SIZrr_Int VR128X:$src)>;
7506  def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
7507            (VCVTTSS2SIZrm_Int ssmem:$src)>;
7508  def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
7509            (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
7510  def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
7511            (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
7512  def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
7513            (VCVTTSD2SIZrr_Int VR128X:$src)>;
7514  def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
7515            (VCVTTSD2SIZrm_Int sdmem:$src)>;
7516  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
7517            (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
7518  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
7519            (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
7520} // HasAVX512
7521
7522//===----------------------------------------------------------------------===//
7523// AVX-512  Convert form float to double and back
7524//===----------------------------------------------------------------------===//
7525
7526multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7527                                X86VectorVTInfo _Src, SDNode OpNode,
7528                                X86FoldableSchedWrite sched> {
7529  defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7530                         (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7531                         "$src2, $src1", "$src1, $src2",
7532                         (_.VT (OpNode (_.VT _.RC:$src1),
7533                                       (_Src.VT _Src.RC:$src2),
7534                                       (i32 FROUND_CURRENT)))>,
7535                         EVEX_4V, VEX_LIG, Sched<[sched]>;
7536  defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7537                         (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7538                         "$src2, $src1", "$src1, $src2",
7539                         (_.VT (OpNode (_.VT _.RC:$src1),
7540                                  (_Src.VT _Src.ScalarIntMemCPat:$src2),
7541                                  (i32 FROUND_CURRENT)))>,
7542                         EVEX_4V, VEX_LIG,
7543                         Sched<[sched.Folded, ReadAfterLd]>;
7544
7545  let isCodeGenOnly = 1, hasSideEffects = 0 in {
7546    def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7547               (ins _.FRC:$src1, _Src.FRC:$src2),
7548               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7549               EVEX_4V, VEX_LIG, Sched<[sched]>;
7550    let mayLoad = 1 in
7551    def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7552               (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7553               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7554               EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>;
7555  }
7556}
7557
7558// Scalar Coversion with SAE - suppress all exceptions
7559multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7560                                    X86VectorVTInfo _Src, SDNode OpNodeRnd,
7561                                    X86FoldableSchedWrite sched> {
7562  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7563                        (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7564                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7565                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7566                                         (_Src.VT _Src.RC:$src2),
7567                                         (i32 FROUND_NO_EXC)))>,
7568                        EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7569}
7570
7571// Scalar Conversion with rounding control (RC)
7572multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7573                                   X86VectorVTInfo _Src, SDNode OpNodeRnd,
7574                                   X86FoldableSchedWrite sched> {
7575  defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7576                        (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7577                        "$rc, $src2, $src1", "$src1, $src2, $rc",
7578                        (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7579                                         (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
7580                        EVEX_4V, VEX_LIG, Sched<[sched]>,
7581                        EVEX_B, EVEX_RC;
7582}
7583multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7584                                  SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7585                                  X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7586  let Predicates = [HasAVX512] in {
7587    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7588             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7589                               OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7590  }
7591}
7592
7593multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
7594                                      X86FoldableSchedWrite sched,
7595                                      X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7596  let Predicates = [HasAVX512] in {
7597    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7598             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7599             EVEX_CD8<32, CD8VT1>, XS;
7600  }
7601}
7602defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
7603                                         X86froundRnd, WriteCvtSD2SS, f64x_info,
7604                                         f32x_info>;
7605defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
7606                                          X86fpextRnd, WriteCvtSS2SD, f32x_info,
7607                                          f64x_info>;
7608
7609def : Pat<(f64 (fpextend FR32X:$src)),
7610          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7611          Requires<[HasAVX512]>;
7612def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7613          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7614          Requires<[HasAVX512, OptForSize]>;
7615
7616def : Pat<(f64 (extloadf32 addr:$src)),
7617          (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7618      Requires<[HasAVX512, OptForSize]>;
7619
7620def : Pat<(f64 (extloadf32 addr:$src)),
7621          (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
7622          Requires<[HasAVX512, OptForSpeed]>;
7623
7624def : Pat<(f32 (fpround FR64X:$src)),
7625          (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7626           Requires<[HasAVX512]>;
7627
7628def : Pat<(v4f32 (X86Movss
7629                   (v4f32 VR128X:$dst),
7630                   (v4f32 (scalar_to_vector
7631                     (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7632          (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7633          Requires<[HasAVX512]>;
7634
7635def : Pat<(v2f64 (X86Movsd
7636                   (v2f64 VR128X:$dst),
7637                   (v2f64 (scalar_to_vector
7638                     (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7639          (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7640          Requires<[HasAVX512]>;
7641
7642//===----------------------------------------------------------------------===//
7643// AVX-512  Vector convert from signed/unsigned integer to float/double
7644//          and from float/double to signed/unsigned integer
7645//===----------------------------------------------------------------------===//
7646
7647multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7648                          X86VectorVTInfo _Src, SDNode OpNode,
7649                          X86FoldableSchedWrite sched,
7650                          string Broadcast = _.BroadcastStr,
7651                          string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
7652
7653  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7654                         (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
7655                         (_.VT (OpNode (_Src.VT _Src.RC:$src)))>,
7656                         EVEX, Sched<[sched]>;
7657
7658  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7659                         (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
7660                         (_.VT (OpNode (_Src.VT
7661                             (bitconvert (_Src.LdFrag addr:$src)))))>,
7662                         EVEX, Sched<[sched.Folded]>;
7663
7664  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7665                         (ins _Src.ScalarMemOp:$src), OpcodeStr,
7666                         "${src}"##Broadcast, "${src}"##Broadcast,
7667                         (_.VT (OpNode (_Src.VT
7668                                  (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
7669                            ))>, EVEX, EVEX_B,
7670                         Sched<[sched.Folded]>;
7671}
7672// Coversion with SAE - suppress all exceptions
7673multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7674                              X86VectorVTInfo _Src, SDNode OpNodeRnd,
7675                              X86FoldableSchedWrite sched> {
7676  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7677                        (ins _Src.RC:$src), OpcodeStr,
7678                        "{sae}, $src", "$src, {sae}",
7679                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
7680                               (i32 FROUND_NO_EXC)))>,
7681                        EVEX, EVEX_B, Sched<[sched]>;
7682}
7683
7684// Conversion with rounding control (RC)
7685multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7686                         X86VectorVTInfo _Src, SDNode OpNodeRnd,
7687                         X86FoldableSchedWrite sched> {
7688  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7689                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7690                        "$rc, $src", "$src, $rc",
7691                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
7692                        EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7693}
7694
7695// Extend Float to Double
7696multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7697                           X86SchedWriteWidths sched> {
7698  let Predicates = [HasAVX512] in {
7699    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
7700                            fpextend, sched.ZMM>,
7701             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7702                                X86vfpextRnd, sched.ZMM>, EVEX_V512;
7703  }
7704  let Predicates = [HasVLX] in {
7705    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7706                               X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7707    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
7708                               sched.YMM>, EVEX_V256;
7709  }
7710}
7711
7712// Truncate Double to Float
7713multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7714  let Predicates = [HasAVX512] in {
7715    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
7716             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7717                               X86vfproundRnd, sched.ZMM>, EVEX_V512;
7718  }
7719  let Predicates = [HasVLX] in {
7720    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7721                               X86vfpround, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7722    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
7723                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7724
7725    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7726                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7727    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7728                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7729    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7730                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7731    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7732                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7733  }
7734}
7735
7736defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7737                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
7738defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7739                                  PS, EVEX_CD8<32, CD8VH>;
7740
7741def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7742            (VCVTPS2PDZrm addr:$src)>;
7743
7744let Predicates = [HasVLX] in {
7745  def : Pat<(X86vzmovl (v2f64 (bitconvert
7746                               (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
7747            (VCVTPD2PSZ128rr VR128X:$src)>;
7748  def : Pat<(X86vzmovl (v2f64 (bitconvert
7749                               (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
7750            (VCVTPD2PSZ128rm addr:$src)>;
7751  def : Pat<(v2f64 (extloadv2f32 addr:$src)),
7752              (VCVTPS2PDZ128rm addr:$src)>;
7753  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
7754              (VCVTPS2PDZ256rm addr:$src)>;
7755}
7756
7757// Convert Signed/Unsigned Doubleword to Double
7758multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7759                           SDNode OpNode128, X86SchedWriteWidths sched> {
7760  // No rounding in this op
7761  let Predicates = [HasAVX512] in
7762    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7763                            sched.ZMM>, EVEX_V512;
7764
7765  let Predicates = [HasVLX] in {
7766    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7767                               OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
7768    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7769                               sched.YMM>, EVEX_V256;
7770  }
7771}
7772
7773// Convert Signed/Unsigned Doubleword to Float
7774multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7775                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7776  let Predicates = [HasAVX512] in
7777    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7778                            sched.ZMM>,
7779             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7780                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7781
7782  let Predicates = [HasVLX] in {
7783    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7784                               sched.XMM>, EVEX_V128;
7785    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7786                               sched.YMM>, EVEX_V256;
7787  }
7788}
7789
7790// Convert Float to Signed/Unsigned Doubleword with truncation
7791multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7792                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7793  let Predicates = [HasAVX512] in {
7794    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7795                            sched.ZMM>,
7796             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7797                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7798  }
7799  let Predicates = [HasVLX] in {
7800    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7801                               sched.XMM>, EVEX_V128;
7802    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7803                               sched.YMM>, EVEX_V256;
7804  }
7805}
7806
7807// Convert Float to Signed/Unsigned Doubleword
7808multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7809                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7810  let Predicates = [HasAVX512] in {
7811    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7812                            sched.ZMM>,
7813             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7814                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7815  }
7816  let Predicates = [HasVLX] in {
7817    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7818                               sched.XMM>, EVEX_V128;
7819    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7820                               sched.YMM>, EVEX_V256;
7821  }
7822}
7823
7824// Convert Double to Signed/Unsigned Doubleword with truncation
7825multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7826                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7827  let Predicates = [HasAVX512] in {
7828    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7829                            sched.ZMM>,
7830             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7831                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7832  }
7833  let Predicates = [HasVLX] in {
7834    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7835    // memory forms of these instructions in Asm Parser. They have the same
7836    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7837    // due to the same reason.
7838    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7839                               OpNode, sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7840    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7841                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7842
7843    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7844                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7845    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7846                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7847    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7848                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7849    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7850                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
7851  }
7852}
7853
7854// Convert Double to Signed/Unsigned Doubleword
7855multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7856                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7857  let Predicates = [HasAVX512] in {
7858    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7859                            sched.ZMM>,
7860             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7861                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7862  }
7863  let Predicates = [HasVLX] in {
7864    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7865    // memory forms of these instructions in Asm Parcer. They have the same
7866    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7867    // due to the same reason.
7868    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7869                               sched.XMM, "{1to2}", "{x}">, EVEX_V128;
7870    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7871                               sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7872
7873    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7874                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7875    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7876                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
7877    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7878                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7879    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7880                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
7881  }
7882}
7883
7884// Convert Double to Signed/Unsigned Quardword
7885multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7886                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7887  let Predicates = [HasDQI] in {
7888    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7889                            sched.ZMM>,
7890             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7891                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7892  }
7893  let Predicates = [HasDQI, HasVLX] in {
7894    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7895                               sched.XMM>, EVEX_V128;
7896    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7897                               sched.YMM>, EVEX_V256;
7898  }
7899}
7900
7901// Convert Double to Signed/Unsigned Quardword with truncation
7902multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7903                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7904  let Predicates = [HasDQI] in {
7905    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7906                            sched.ZMM>,
7907             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7908                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7909  }
7910  let Predicates = [HasDQI, HasVLX] in {
7911    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7912                               sched.XMM>, EVEX_V128;
7913    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7914                               sched.YMM>, EVEX_V256;
7915  }
7916}
7917
7918// Convert Signed/Unsigned Quardword to Double
7919multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7920                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7921  let Predicates = [HasDQI] in {
7922    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7923                            sched.ZMM>,
7924             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7925                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7926  }
7927  let Predicates = [HasDQI, HasVLX] in {
7928    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7929                               sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7930    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7931                               sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7932  }
7933}
7934
7935// Convert Float to Signed/Unsigned Quardword
7936multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7937                           SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7938  let Predicates = [HasDQI] in {
7939    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7940                            sched.ZMM>,
7941             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7942                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7943  }
7944  let Predicates = [HasDQI, HasVLX] in {
7945    // Explicitly specified broadcast string, since we take only 2 elements
7946    // from v4f32x_info source
7947    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7948                               sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7949    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7950                               sched.YMM>, EVEX_V256;
7951  }
7952}
7953
7954// Convert Float to Signed/Unsigned Quardword with truncation
7955multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7956                            SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7957  let Predicates = [HasDQI] in {
7958    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7959             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7960                                OpNodeRnd, sched.ZMM>, EVEX_V512;
7961  }
7962  let Predicates = [HasDQI, HasVLX] in {
7963    // Explicitly specified broadcast string, since we take only 2 elements
7964    // from v4f32x_info source
7965    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7966                               sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7967    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7968                               sched.YMM>, EVEX_V256;
7969  }
7970}
7971
7972// Convert Signed/Unsigned Quardword to Float
7973multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7974                           SDNode OpNode128, SDNode OpNodeRnd,
7975                           X86SchedWriteWidths sched> {
7976  let Predicates = [HasDQI] in {
7977    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7978                            sched.ZMM>,
7979             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7980                               OpNodeRnd, sched.ZMM>, EVEX_V512;
7981  }
7982  let Predicates = [HasDQI, HasVLX] in {
7983    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7984    // memory forms of these instructions in Asm Parcer. They have the same
7985    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7986    // due to the same reason.
7987    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7988                               sched.XMM, "{1to2}", "{x}">, EVEX_V128,
7989                               NotEVEX2VEXConvertible;
7990    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7991                               sched.YMM, "{1to4}", "{y}">, EVEX_V256,
7992                               NotEVEX2VEXConvertible;
7993
7994    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7995                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7996    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7997                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
7998    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7999                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
8000    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8001                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
8002  }
8003}
8004
8005defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8006                                 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8007
8008defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8009                                X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8010                                PS, EVEX_CD8<32, CD8VF>;
8011
8012defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8013                                X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
8014                                XS, EVEX_CD8<32, CD8VF>;
8015
8016defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8017                                 X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
8018                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8019
8020defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8021                                 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
8022                                 EVEX_CD8<32, CD8VF>;
8023
8024defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8025                                 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
8026                                 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8027
8028defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8029                                  X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8030                                  EVEX_CD8<32, CD8VH>;
8031
8032defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8033                                 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8034                                 EVEX_CD8<32, CD8VF>;
8035
8036defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8037                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8038                                 EVEX_CD8<32, CD8VF>;
8039
8040defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8041                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8042                                 VEX_W, EVEX_CD8<64, CD8VF>;
8043
8044defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8045                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8046                                 PS, EVEX_CD8<32, CD8VF>;
8047
8048defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8049                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8050                                 PS, EVEX_CD8<64, CD8VF>;
8051
8052defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8053                                 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8054                                 PD, EVEX_CD8<64, CD8VF>;
8055
8056defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8057                                 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8058                                 EVEX_CD8<32, CD8VH>;
8059
8060defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8061                                 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8062                                 PD, EVEX_CD8<64, CD8VF>;
8063
8064defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8065                                 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8066                                 EVEX_CD8<32, CD8VH>;
8067
8068defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8069                                 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
8070                                 PD, EVEX_CD8<64, CD8VF>;
8071
8072defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8073                                 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
8074                                 EVEX_CD8<32, CD8VH>;
8075
8076defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8077                                 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
8078                                 PD, EVEX_CD8<64, CD8VF>;
8079
8080defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8081                                 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
8082                                 EVEX_CD8<32, CD8VH>;
8083
8084defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8085                            X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8086                            EVEX_CD8<64, CD8VF>;
8087
8088defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8089                            X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8090                            EVEX_CD8<64, CD8VF>;
8091
8092defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
8093                            X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8094                            EVEX_CD8<64, CD8VF>;
8095
8096defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
8097                            X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8098                            EVEX_CD8<64, CD8VF>;
8099
8100let Predicates = [HasAVX512] in  {
8101  def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
8102            (VCVTTPS2DQZrr VR512:$src)>;
8103  def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
8104            (VCVTTPS2DQZrm addr:$src)>;
8105
8106  def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
8107            (VCVTTPS2UDQZrr VR512:$src)>;
8108  def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
8109            (VCVTTPS2UDQZrm addr:$src)>;
8110
8111  def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
8112            (VCVTTPD2DQZrr VR512:$src)>;
8113  def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
8114            (VCVTTPD2DQZrm addr:$src)>;
8115
8116  def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
8117            (VCVTTPD2UDQZrr VR512:$src)>;
8118  def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
8119            (VCVTTPD2UDQZrm addr:$src)>;
8120}
8121
8122let Predicates = [HasVLX] in {
8123  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
8124            (VCVTTPS2DQZ128rr VR128X:$src)>;
8125  def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
8126            (VCVTTPS2DQZ128rm addr:$src)>;
8127
8128  def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
8129            (VCVTTPS2UDQZ128rr VR128X:$src)>;
8130  def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
8131            (VCVTTPS2UDQZ128rm addr:$src)>;
8132
8133  def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
8134            (VCVTTPS2DQZ256rr VR256X:$src)>;
8135  def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
8136            (VCVTTPS2DQZ256rm addr:$src)>;
8137
8138  def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
8139            (VCVTTPS2UDQZ256rr VR256X:$src)>;
8140  def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
8141            (VCVTTPS2UDQZ256rm addr:$src)>;
8142
8143  def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
8144            (VCVTTPD2DQZ256rr VR256X:$src)>;
8145  def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
8146            (VCVTTPD2DQZ256rm addr:$src)>;
8147
8148  def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
8149            (VCVTTPD2UDQZ256rr VR256X:$src)>;
8150  def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
8151            (VCVTTPD2UDQZ256rm addr:$src)>;
8152}
8153
8154let Predicates = [HasDQI] in {
8155  def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
8156            (VCVTTPS2QQZrr VR256X:$src)>;
8157  def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
8158            (VCVTTPS2QQZrm addr:$src)>;
8159
8160  def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
8161            (VCVTTPS2UQQZrr VR256X:$src)>;
8162  def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
8163            (VCVTTPS2UQQZrm addr:$src)>;
8164
8165  def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
8166            (VCVTTPD2QQZrr VR512:$src)>;
8167  def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
8168            (VCVTTPD2QQZrm addr:$src)>;
8169
8170  def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
8171            (VCVTTPD2UQQZrr VR512:$src)>;
8172  def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
8173            (VCVTTPD2UQQZrm addr:$src)>;
8174}
8175
8176let Predicates = [HasDQI, HasVLX] in {
8177  def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
8178            (VCVTTPS2QQZ256rr VR128X:$src)>;
8179  def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
8180            (VCVTTPS2QQZ256rm addr:$src)>;
8181
8182  def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
8183            (VCVTTPS2UQQZ256rr VR128X:$src)>;
8184  def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
8185            (VCVTTPS2UQQZ256rm addr:$src)>;
8186
8187  def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
8188            (VCVTTPD2QQZ128rr VR128X:$src)>;
8189  def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
8190            (VCVTTPD2QQZ128rm addr:$src)>;
8191
8192  def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
8193            (VCVTTPD2UQQZ128rr VR128X:$src)>;
8194  def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
8195            (VCVTTPD2UQQZ128rm addr:$src)>;
8196
8197  def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
8198            (VCVTTPD2QQZ256rr VR256X:$src)>;
8199  def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
8200            (VCVTTPD2QQZ256rm addr:$src)>;
8201
8202  def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
8203            (VCVTTPD2UQQZ256rr VR256X:$src)>;
8204  def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
8205            (VCVTTPD2UQQZ256rm addr:$src)>;
8206}
8207
8208let Predicates = [HasAVX512, NoVLX] in {
8209def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
8210          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8211           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8212                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8213
8214def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
8215          (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8216           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8217                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8218
8219def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
8220          (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8221           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8222                                 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8223
8224def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8225          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8226           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8227                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8228
8229def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8230          (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8231           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8232                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8233
8234def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8235          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8236           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8237                                 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8238
8239def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8240          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8241           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8242                                 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8243}
8244
8245let Predicates = [HasAVX512, HasVLX] in {
8246  def : Pat<(X86vzmovl (v2i64 (bitconvert
8247                              (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
8248            (VCVTPD2DQZ128rr VR128X:$src)>;
8249  def : Pat<(X86vzmovl (v2i64 (bitconvert
8250                              (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
8251            (VCVTPD2DQZ128rm addr:$src)>;
8252  def : Pat<(X86vzmovl (v2i64 (bitconvert
8253                               (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
8254            (VCVTPD2UDQZ128rr VR128X:$src)>;
8255  def : Pat<(X86vzmovl (v2i64 (bitconvert
8256                              (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
8257            (VCVTTPD2DQZ128rr VR128X:$src)>;
8258  def : Pat<(X86vzmovl (v2i64 (bitconvert
8259                              (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
8260            (VCVTTPD2DQZ128rm addr:$src)>;
8261  def : Pat<(X86vzmovl (v2i64 (bitconvert
8262                               (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
8263            (VCVTTPD2UDQZ128rr VR128X:$src)>;
8264
8265  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8266            (VCVTDQ2PDZ128rm addr:$src)>;
8267  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8268            (VCVTDQ2PDZ128rm addr:$src)>;
8269
8270  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8271            (VCVTUDQ2PDZ128rm addr:$src)>;
8272  def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8273            (VCVTUDQ2PDZ128rm addr:$src)>;
8274}
8275
8276let Predicates = [HasAVX512] in {
8277  def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
8278            (VCVTPD2PSZrm addr:$src)>;
8279  def : Pat<(v8f64 (extloadv8f32 addr:$src)),
8280            (VCVTPS2PDZrm addr:$src)>;
8281}
8282
8283let Predicates = [HasDQI, HasVLX] in {
8284  def : Pat<(X86vzmovl (v2f64 (bitconvert
8285                              (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
8286            (VCVTQQ2PSZ128rr VR128X:$src)>;
8287  def : Pat<(X86vzmovl (v2f64 (bitconvert
8288                              (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
8289            (VCVTUQQ2PSZ128rr VR128X:$src)>;
8290}
8291
8292let Predicates = [HasDQI, NoVLX] in {
8293def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
8294          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8295           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8296                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8297
8298def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
8299          (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8300           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8301                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8302
8303def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
8304          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8305           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8306                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8307
8308def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
8309          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8310           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8311                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8312
8313def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
8314          (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8315           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8316                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
8317
8318def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
8319          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8320           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8321                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8322
8323def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8324          (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8325           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8326                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8327
8328def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8329          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8330           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8331                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8332
8333def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8334          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8335           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8336                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8337
8338def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8339          (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8340           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8341                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
8342
8343def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8344          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8345           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8346                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
8347
8348def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8349          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8350           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8351                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
8352}
8353
8354//===----------------------------------------------------------------------===//
8355// Half precision conversion instructions
8356//===----------------------------------------------------------------------===//
8357
8358multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8359                           X86MemOperand x86memop, PatFrag ld_frag,
8360                           X86FoldableSchedWrite sched> {
8361  defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8362                            (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8363                            (X86cvtph2ps (_src.VT _src.RC:$src))>,
8364                            T8PD, Sched<[sched]>;
8365  defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8366                            (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8367                            (X86cvtph2ps (_src.VT
8368                                          (bitconvert
8369                                           (ld_frag addr:$src))))>,
8370                            T8PD, Sched<[sched.Folded]>;
8371}
8372
8373multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8374                               X86FoldableSchedWrite sched> {
8375  defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8376                             (ins _src.RC:$src), "vcvtph2ps",
8377                             "{sae}, $src", "$src, {sae}",
8378                             (X86cvtph2psRnd (_src.VT _src.RC:$src),
8379                                             (i32 FROUND_NO_EXC))>,
8380                             T8PD, EVEX_B, Sched<[sched]>;
8381}
8382
8383let Predicates = [HasAVX512] in
8384  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
8385                                    WriteCvtPH2PSZ>,
8386                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8387                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8388
8389let Predicates = [HasVLX] in {
8390  defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8391                       loadv2i64, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8392                       EVEX_CD8<32, CD8VH>;
8393  defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8394                       loadv2i64, WriteCvtPH2PS>, EVEX, EVEX_V128,
8395                       EVEX_CD8<32, CD8VH>;
8396
8397  // Pattern match vcvtph2ps of a scalar i64 load.
8398  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
8399            (VCVTPH2PSZ128rm addr:$src)>;
8400  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
8401            (VCVTPH2PSZ128rm addr:$src)>;
8402  def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8403              (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8404            (VCVTPH2PSZ128rm addr:$src)>;
8405}
8406
8407multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8408                           X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8409  defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
8410                   (ins _src.RC:$src1, i32u8imm:$src2),
8411                   "vcvtps2ph", "$src2, $src1", "$src1, $src2",
8412                   (X86cvtps2ph (_src.VT _src.RC:$src1),
8413                                (i32 imm:$src2)), 0, 0>,
8414                   AVX512AIi8Base, Sched<[RR]>;
8415  let hasSideEffects = 0, mayStore = 1 in {
8416    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8417               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8418               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8419               Sched<[MR]>;
8420    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8421               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8422               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8423                EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8424  }
8425}
8426
8427multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8428                               SchedWrite Sched> {
8429  let hasSideEffects = 0 in
8430  defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8431                   (outs _dest.RC:$dst),
8432                   (ins _src.RC:$src1, i32u8imm:$src2),
8433                   "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8434                   EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8435}
8436
8437let Predicates = [HasAVX512] in {
8438  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8439                                    WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8440                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8441                                        EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8442  let Predicates = [HasVLX] in {
8443    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8444                                         WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8445                                         EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8446    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8447                                         WriteCvtPS2PH, WriteCvtPS2PHSt>,
8448                                         EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8449  }
8450
8451  def : Pat<(store (f64 (extractelt
8452                         (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8453                         (iPTR 0))), addr:$dst),
8454            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8455  def : Pat<(store (i64 (extractelt
8456                         (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8457                         (iPTR 0))), addr:$dst),
8458            (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
8459  def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
8460            (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
8461  def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
8462            (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
8463}
8464
8465// Patterns for matching conversions from float to half-float and vice versa.
8466let Predicates = [HasVLX] in {
8467  // Use MXCSR.RC for rounding instead of explicitly specifying the default
8468  // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8469  // configurations we support (the default). However, falling back to MXCSR is
8470  // more consistent with other instructions, which are always controlled by it.
8471  // It's encoded as 0b100.
8472  def : Pat<(fp_to_f16 FR32X:$src),
8473            (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8474              (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8475
8476  def : Pat<(f16_to_fp GR16:$src),
8477            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8478              (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8479
8480  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8481            (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8482              (v8i16 (VCVTPS2PHZ128rr
8483               (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8484}
8485
8486//  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
8487multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8488                            string OpcodeStr, X86FoldableSchedWrite sched> {
8489  let hasSideEffects = 0 in
8490  def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8491                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8492                  EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8493}
8494
8495let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8496  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
8497                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8498  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
8499                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8500  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
8501                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8502  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
8503                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8504}
8505
8506let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8507  defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
8508                                 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8509                                 EVEX_CD8<32, CD8VT1>;
8510  defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
8511                                  "ucomisd", WriteFCom>, PD, EVEX,
8512                                  VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8513  let Pattern = []<dag> in {
8514    defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
8515                                   "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8516                                   EVEX_CD8<32, CD8VT1>;
8517    defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
8518                                   "comisd", WriteFCom>, PD, EVEX,
8519                                    VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8520  }
8521  let isCodeGenOnly = 1 in {
8522    defm VUCOMISSZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8523                          sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
8524                          EVEX_CD8<32, CD8VT1>;
8525    defm VUCOMISDZ  : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8526                          sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
8527                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8528
8529    defm VCOMISSZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8530                          sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
8531                          EVEX_CD8<32, CD8VT1>;
8532    defm VCOMISDZ  : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8533                          sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
8534                          VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8535  }
8536}
8537
8538/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8539multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8540                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8541  let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
8542  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8543                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8544                           "$src2, $src1", "$src1, $src2",
8545                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8546                           EVEX_4V, Sched<[sched]>;
8547  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8548                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8549                         "$src2, $src1", "$src1, $src2",
8550                         (OpNode (_.VT _.RC:$src1),
8551                          _.ScalarIntMemCPat:$src2)>, EVEX_4V,
8552                          Sched<[sched.Folded, ReadAfterLd]>;
8553}
8554}
8555
8556defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8557                               f32x_info>, EVEX_CD8<32, CD8VT1>,
8558                               T8PD;
8559defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8560                               f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8561                               T8PD;
8562defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8563                                 SchedWriteFRsqrt.Scl, f32x_info>,
8564                                 EVEX_CD8<32, CD8VT1>, T8PD;
8565defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8566                                 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8567                                 EVEX_CD8<64, CD8VT1>, T8PD;
8568
8569/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8570multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8571                         X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8572  let ExeDomain = _.ExeDomain in {
8573  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8574                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8575                         (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8576                         Sched<[sched]>;
8577  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8578                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8579                         (OpNode (_.VT
8580                           (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8581                         Sched<[sched.Folded, ReadAfterLd]>;
8582  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8583                          (ins _.ScalarMemOp:$src), OpcodeStr,
8584                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8585                          (OpNode (_.VT
8586                            (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8587                          EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8588  }
8589}
8590
8591multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8592                                X86SchedWriteWidths sched> {
8593  defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8594                           v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8595  defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8596                           v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8597
8598  // Define only if AVX512VL feature is present.
8599  let Predicates = [HasVLX] in {
8600    defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8601                                OpNode, sched.XMM, v4f32x_info>,
8602                               EVEX_V128, EVEX_CD8<32, CD8VF>;
8603    defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8604                                OpNode, sched.YMM, v8f32x_info>,
8605                               EVEX_V256, EVEX_CD8<32, CD8VF>;
8606    defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8607                                OpNode, sched.XMM, v2f64x_info>,
8608                               EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8609    defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8610                                OpNode, sched.YMM, v4f64x_info>,
8611                               EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8612  }
8613}
8614
8615defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8616defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8617
8618/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8619multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8620                         SDNode OpNode, X86FoldableSchedWrite sched> {
8621  let ExeDomain = _.ExeDomain in {
8622  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8623                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8624                           "$src2, $src1", "$src1, $src2",
8625                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8626                           (i32 FROUND_CURRENT))>,
8627                           Sched<[sched]>;
8628
8629  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8630                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8631                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8632                            (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8633                            (i32 FROUND_NO_EXC))>, EVEX_B,
8634                            Sched<[sched]>;
8635
8636  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8637                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8638                         "$src2, $src1", "$src1, $src2",
8639                         (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
8640                         (i32 FROUND_CURRENT))>,
8641                         Sched<[sched.Folded, ReadAfterLd]>;
8642  }
8643}
8644
8645multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8646                        X86FoldableSchedWrite sched> {
8647  defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
8648               EVEX_CD8<32, CD8VT1>;
8649  defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
8650               EVEX_CD8<64, CD8VT1>, VEX_W;
8651}
8652
8653let Predicates = [HasERI] in {
8654  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
8655                              T8PD, EVEX_4V;
8656  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
8657                               SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8658}
8659
8660defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
8661                              SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8662/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8663
8664multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8665                         SDNode OpNode, X86FoldableSchedWrite sched> {
8666  let ExeDomain = _.ExeDomain in {
8667  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8668                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8669                         (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
8670                         Sched<[sched]>;
8671
8672  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8673                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8674                         (OpNode (_.VT
8675                             (bitconvert (_.LdFrag addr:$src))),
8676                          (i32 FROUND_CURRENT))>,
8677                          Sched<[sched.Folded, ReadAfterLd]>;
8678
8679  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8680                         (ins _.ScalarMemOp:$src), OpcodeStr,
8681                         "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8682                         (OpNode (_.VT
8683                                  (X86VBroadcast (_.ScalarLdFrag addr:$src))),
8684                                 (i32 FROUND_CURRENT))>, EVEX_B,
8685                         Sched<[sched.Folded, ReadAfterLd]>;
8686  }
8687}
8688multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8689                         SDNode OpNode, X86FoldableSchedWrite sched> {
8690  let ExeDomain = _.ExeDomain in
8691  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8692                        (ins _.RC:$src), OpcodeStr,
8693                        "{sae}, $src", "$src, {sae}",
8694                        (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
8695                        EVEX_B, Sched<[sched]>;
8696}
8697
8698multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8699                       X86SchedWriteWidths sched> {
8700   defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8701              avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8702              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8703   defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8704              avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8705              T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8706}
8707
8708multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8709                                  SDNode OpNode, X86SchedWriteWidths sched> {
8710  // Define only if AVX512VL feature is present.
8711  let Predicates = [HasVLX] in {
8712    defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
8713                                     EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8714    defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
8715                                     EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8716    defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
8717                                     EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8718    defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
8719                                     EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8720  }
8721}
8722
8723let Predicates = [HasERI] in {
8724 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
8725 defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
8726 defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
8727}
8728defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
8729                 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
8730                                          SchedWriteFRnd>, EVEX;
8731
8732multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8733                                    X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8734  let ExeDomain = _.ExeDomain in
8735  defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8736                         (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8737                         (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
8738                         EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8739}
8740
8741multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8742                              X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8743  let ExeDomain = _.ExeDomain in {
8744  defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8745                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
8746                         (_.VT (fsqrt _.RC:$src))>, EVEX,
8747                         Sched<[sched]>;
8748  defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8749                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8750                         (fsqrt (_.VT
8751                           (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8752                           Sched<[sched.Folded, ReadAfterLd]>;
8753  defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8754                          (ins _.ScalarMemOp:$src), OpcodeStr,
8755                          "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8756                          (fsqrt (_.VT
8757                            (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
8758                          EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
8759  }
8760}
8761
8762multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8763                                  X86SchedWriteSizes sched> {
8764  defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8765                                sched.PS.ZMM, v16f32_info>,
8766                                EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8767  defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8768                                sched.PD.ZMM, v8f64_info>,
8769                                EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8770  // Define only if AVX512VL feature is present.
8771  let Predicates = [HasVLX] in {
8772    defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8773                                     sched.PS.XMM, v4f32x_info>,
8774                                     EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8775    defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8776                                     sched.PS.YMM, v8f32x_info>,
8777                                     EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8778    defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8779                                     sched.PD.XMM, v2f64x_info>,
8780                                     EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8781    defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8782                                     sched.PD.YMM, v4f64x_info>,
8783                                     EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8784  }
8785}
8786
8787multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8788                                        X86SchedWriteSizes sched> {
8789  defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8790                                      sched.PS.ZMM, v16f32_info>,
8791                                      EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8792  defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8793                                      sched.PD.ZMM, v8f64_info>,
8794                                      EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8795}
8796
8797multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8798                              X86VectorVTInfo _, string Name> {
8799  let ExeDomain = _.ExeDomain in {
8800    defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8801                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8802                         "$src2, $src1", "$src1, $src2",
8803                         (X86fsqrtRnds (_.VT _.RC:$src1),
8804                                    (_.VT _.RC:$src2),
8805                                    (i32 FROUND_CURRENT))>,
8806                         Sched<[sched]>;
8807    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8808                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8809                         "$src2, $src1", "$src1, $src2",
8810                         (X86fsqrtRnds (_.VT _.RC:$src1),
8811                                    _.ScalarIntMemCPat:$src2,
8812                                    (i32 FROUND_CURRENT))>,
8813                         Sched<[sched.Folded, ReadAfterLd]>;
8814    defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8815                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8816                         "$rc, $src2, $src1", "$src1, $src2, $rc",
8817                         (X86fsqrtRnds (_.VT _.RC:$src1),
8818                                     (_.VT _.RC:$src2),
8819                                     (i32 imm:$rc))>,
8820                         EVEX_B, EVEX_RC, Sched<[sched]>;
8821
8822    let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8823      def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8824                (ins _.FRC:$src1, _.FRC:$src2),
8825                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8826                Sched<[sched]>;
8827      let mayLoad = 1 in
8828        def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8829                  (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8830                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8831                  Sched<[sched.Folded, ReadAfterLd]>;
8832    }
8833  }
8834
8835  let Predicates = [HasAVX512] in {
8836    def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
8837              (!cast<Instruction>(Name#Zr)
8838                  (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8839  }
8840
8841  let Predicates = [HasAVX512, OptForSize] in {
8842    def : Pat<(_.EltVT (fsqrt (load addr:$src))),
8843              (!cast<Instruction>(Name#Zm)
8844                  (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8845  }
8846}
8847
8848multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8849                                  X86SchedWriteSizes sched> {
8850  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8851                        EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8852  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8853                        EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8854}
8855
8856defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8857             avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8858
8859defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8860
8861multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8862                                  X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8863  let ExeDomain = _.ExeDomain in {
8864  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8865                           (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8866                           "$src3, $src2, $src1", "$src1, $src2, $src3",
8867                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8868                           (i32 imm:$src3)))>,
8869                           Sched<[sched]>;
8870
8871  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8872                         (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8873                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
8874                         (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8875                         (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
8876                         Sched<[sched]>;
8877
8878  defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8879                         (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
8880                         OpcodeStr,
8881                         "$src3, $src2, $src1", "$src1, $src2, $src3",
8882                         (_.VT (X86RndScales _.RC:$src1,
8883                                _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
8884                         Sched<[sched.Folded, ReadAfterLd]>;
8885
8886  let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
8887    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8888               (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8889               OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8890               []>, Sched<[sched]>;
8891
8892    let mayLoad = 1 in
8893      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8894                 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8895                 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8896                 []>, Sched<[sched.Folded, ReadAfterLd]>;
8897  }
8898  }
8899
8900  let Predicates = [HasAVX512] in {
8901    def : Pat<(ffloor _.FRC:$src),
8902              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8903               _.FRC:$src, (i32 0x9)))>;
8904    def : Pat<(fceil _.FRC:$src),
8905              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8906               _.FRC:$src, (i32 0xa)))>;
8907    def : Pat<(ftrunc _.FRC:$src),
8908              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8909               _.FRC:$src, (i32 0xb)))>;
8910    def : Pat<(frint _.FRC:$src),
8911              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8912               _.FRC:$src, (i32 0x4)))>;
8913    def : Pat<(fnearbyint _.FRC:$src),
8914              (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8915               _.FRC:$src, (i32 0xc)))>;
8916  }
8917
8918  let Predicates = [HasAVX512, OptForSize] in {
8919    def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8920              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8921               addr:$src, (i32 0x9)))>;
8922    def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8923              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8924               addr:$src, (i32 0xa)))>;
8925    def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8926              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8927               addr:$src, (i32 0xb)))>;
8928    def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8929              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8930               addr:$src, (i32 0x4)))>;
8931    def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8932              (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8933               addr:$src, (i32 0xc)))>;
8934  }
8935}
8936
8937defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
8938                                           SchedWriteFRnd.Scl, f32x_info>,
8939                                           AVX512AIi8Base, EVEX_4V,
8940                                           EVEX_CD8<32, CD8VT1>;
8941
8942defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
8943                                           SchedWriteFRnd.Scl, f64x_info>,
8944                                           VEX_W, AVX512AIi8Base, EVEX_4V,
8945                                           EVEX_CD8<64, CD8VT1>;
8946
8947multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
8948                                dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8949                                dag OutMask, Predicate BasePredicate> {
8950  let Predicates = [BasePredicate] in {
8951    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8952               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8953               (extractelt _.VT:$dst, (iPTR 0))))),
8954              (!cast<Instruction>("V"#OpcPrefix#r_Intk)
8955               _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
8956
8957    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8958               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8959               ZeroFP))),
8960              (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
8961               OutMask, _.VT:$src2, _.VT:$src1)>;
8962  }
8963}
8964
8965defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
8966                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
8967                            fp32imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
8968defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
8969                            (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
8970                            fp64imm0, (COPY_TO_REGCLASS  $mask, VK1WM), HasAVX512>;
8971
8972multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
8973                                    X86VectorVTInfo _, PatLeaf ZeroFP,
8974                                    bits<8> ImmV, Predicate BasePredicate> {
8975  let Predicates = [BasePredicate] in {
8976    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8977               (OpNode (extractelt _.VT:$src2, (iPTR 0))),
8978               (extractelt _.VT:$dst, (iPTR 0))))),
8979              (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
8980               _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8981
8982    def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
8983               (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
8984              (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
8985               VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8986  }
8987}
8988
8989defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
8990                                v4f32x_info, fp32imm0, 0x01, HasAVX512>;
8991defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
8992                                v4f32x_info, fp32imm0, 0x02, HasAVX512>;
8993defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
8994                                v2f64x_info, fp64imm0, 0x01, HasAVX512>;
8995defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
8996                                v2f64x_info, fp64imm0, 0x02,  HasAVX512>;
8997
8998
8999//-------------------------------------------------
9000// Integer truncate and extend operations
9001//-------------------------------------------------
9002
9003multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9004                              X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9005                              X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9006  let ExeDomain = DestInfo.ExeDomain in
9007  defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
9008                      (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
9009                      (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
9010                      EVEX, T8XS, Sched<[sched]>;
9011
9012  let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9013    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9014               (ins x86memop:$dst, SrcInfo.RC:$src),
9015               OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9016               EVEX, Sched<[sched.Folded]>;
9017
9018    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9019               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9020               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9021               EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9022  }//mayStore = 1, hasSideEffects = 0
9023}
9024
9025multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9026                                    X86VectorVTInfo DestInfo,
9027                                    PatFrag truncFrag, PatFrag mtruncFrag,
9028                                    string Name> {
9029
9030  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9031            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9032                                    addr:$dst, SrcInfo.RC:$src)>;
9033
9034  def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
9035                                               (SrcInfo.VT SrcInfo.RC:$src)),
9036            (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9037                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9038}
9039
9040multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9041                        SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched,
9042                        AVX512VLVectorVTInfo VTSrcInfo,
9043                        X86VectorVTInfo DestInfoZ128,
9044                        X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9045                        X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9046                        X86MemOperand x86memopZ, PatFrag truncFrag,
9047                        PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9048
9049  let Predicates = [HasVLX, prd] in {
9050    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode128, sched,
9051                             VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9052                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9053                             truncFrag, mtruncFrag, NAME>, EVEX_V128;
9054
9055    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode256, sched,
9056                             VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9057                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9058                             truncFrag, mtruncFrag, NAME>, EVEX_V256;
9059  }
9060  let Predicates = [prd] in
9061    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode512, sched,
9062                             VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9063                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9064                             truncFrag, mtruncFrag, NAME>, EVEX_V512;
9065}
9066
9067multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9068                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9069                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9070  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched,
9071                          avx512vl_i64_info, v16i8x_info, v16i8x_info,
9072                          v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9073                          MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9074}
9075
9076multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9077                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9078                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9079  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9080                          avx512vl_i64_info, v8i16x_info, v8i16x_info,
9081                          v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9082                          MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9083}
9084
9085multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9086                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9087                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9088  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9089                          avx512vl_i64_info, v4i32x_info, v4i32x_info,
9090                          v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9091                          MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9092}
9093
9094multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9095                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9096                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9097  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched,
9098                          avx512vl_i32_info, v16i8x_info, v16i8x_info,
9099                          v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9100                          MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9101}
9102
9103multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9104                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9105                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9106  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched,
9107                          avx512vl_i32_info, v8i16x_info, v8i16x_info,
9108                          v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9109                          MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9110}
9111
9112multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9113                           X86FoldableSchedWrite sched, PatFrag StoreNode,
9114                           PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> {
9115  defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9116                          sched, avx512vl_i16_info, v16i8x_info, v16i8x_info,
9117                          v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9118                          MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9119}
9120
9121defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   trunc, WriteShuffle256,
9122                                  truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9123defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs, WriteShuffle256,
9124                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
9125defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256,
9126                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
9127
9128defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw",   trunc, WriteShuffle256,
9129                                  truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9130defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs, WriteShuffle256,
9131                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
9132defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256,
9133                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
9134
9135defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd",   trunc, WriteShuffle256,
9136                                  truncstorevi32, masked_truncstorevi32, X86vtrunc>;
9137defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs, WriteShuffle256,
9138                                  truncstore_s_vi32, masked_truncstore_s_vi32>;
9139defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256,
9140                                  truncstore_us_vi32, masked_truncstore_us_vi32>;
9141
9142defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256,
9143                                  truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9144defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb",   X86vtruncs, WriteShuffle256,
9145                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
9146defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus, WriteShuffle256,
9147                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
9148
9149defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256,
9150                                  truncstorevi16, masked_truncstorevi16, X86vtrunc>;
9151defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw",   X86vtruncs, WriteShuffle256,
9152                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
9153defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw",  X86vtruncus, WriteShuffle256,
9154                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
9155
9156defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256,
9157                                  truncstorevi8, masked_truncstorevi8, X86vtrunc>;
9158defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb",   X86vtruncs, WriteShuffle256,
9159                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
9160defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus, WriteShuffle256,
9161                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
9162
9163let Predicates = [HasAVX512, NoVLX] in {
9164def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9165         (v8i16 (EXTRACT_SUBREG
9166                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9167                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
9168def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9169         (v4i32 (EXTRACT_SUBREG
9170                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9171                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
9172}
9173
9174let Predicates = [HasBWI, NoVLX] in {
9175def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9176         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9177                                            VR256X:$src, sub_ymm))), sub_xmm))>;
9178}
9179
9180multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9181              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9182              X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9183  let ExeDomain = DestInfo.ExeDomain in {
9184  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9185                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9186                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9187                  EVEX, Sched<[sched]>;
9188
9189  defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9190                  (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9191                  (DestInfo.VT (LdFrag addr:$src))>,
9192                EVEX, Sched<[sched.Folded]>;
9193  }
9194}
9195
9196multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9197          SDNode OpNode, SDNode InVecNode, string ExtTy,
9198          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9199  let Predicates = [HasVLX, HasBWI] in {
9200    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9201                    v16i8x_info, i64mem, LdFrag, InVecNode>,
9202                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9203
9204    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9205                    v16i8x_info, i128mem, LdFrag, OpNode>,
9206                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9207  }
9208  let Predicates = [HasBWI] in {
9209    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9210                    v32i8x_info, i256mem, LdFrag, OpNode>,
9211                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9212  }
9213}
9214
9215multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9216          SDNode OpNode, SDNode InVecNode, string ExtTy,
9217          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9218  let Predicates = [HasVLX, HasAVX512] in {
9219    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9220                   v16i8x_info, i32mem, LdFrag, InVecNode>,
9221                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9222
9223    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9224                   v16i8x_info, i64mem, LdFrag, OpNode>,
9225                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9226  }
9227  let Predicates = [HasAVX512] in {
9228    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9229                   v16i8x_info, i128mem, LdFrag, OpNode>,
9230                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9231  }
9232}
9233
9234multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9235          SDNode OpNode, SDNode InVecNode, string ExtTy,
9236          X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9237  let Predicates = [HasVLX, HasAVX512] in {
9238    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9239                   v16i8x_info, i16mem, LdFrag, InVecNode>,
9240                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9241
9242    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9243                   v16i8x_info, i32mem, LdFrag, OpNode>,
9244                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9245  }
9246  let Predicates = [HasAVX512] in {
9247    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9248                   v16i8x_info, i64mem, LdFrag, OpNode>,
9249                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9250  }
9251}
9252
9253multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9254         SDNode OpNode, SDNode InVecNode, string ExtTy,
9255         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9256  let Predicates = [HasVLX, HasAVX512] in {
9257    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9258                   v8i16x_info, i64mem, LdFrag, InVecNode>,
9259                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9260
9261    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9262                   v8i16x_info, i128mem, LdFrag, OpNode>,
9263                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9264  }
9265  let Predicates = [HasAVX512] in {
9266    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9267                   v16i16x_info, i256mem, LdFrag, OpNode>,
9268                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9269  }
9270}
9271
9272multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9273         SDNode OpNode, SDNode InVecNode, string ExtTy,
9274         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9275  let Predicates = [HasVLX, HasAVX512] in {
9276    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9277                   v8i16x_info, i32mem, LdFrag, InVecNode>,
9278                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9279
9280    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9281                   v8i16x_info, i64mem, LdFrag, OpNode>,
9282                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9283  }
9284  let Predicates = [HasAVX512] in {
9285    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9286                   v8i16x_info, i128mem, LdFrag, OpNode>,
9287                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9288  }
9289}
9290
9291multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9292         SDNode OpNode, SDNode InVecNode, string ExtTy,
9293         X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9294
9295  let Predicates = [HasVLX, HasAVX512] in {
9296    defm Z128:  WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9297                   v4i32x_info, i64mem, LdFrag, InVecNode>,
9298                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9299
9300    defm Z256:  WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9301                   v4i32x_info, i128mem, LdFrag, OpNode>,
9302                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9303  }
9304  let Predicates = [HasAVX512] in {
9305    defm Z   :  WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9306                   v8i32x_info, i256mem, LdFrag, OpNode>,
9307                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9308  }
9309}
9310
9311defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>;
9312defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>;
9313defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>;
9314defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>;
9315defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>;
9316defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>;
9317
9318defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>;
9319defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>;
9320defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>;
9321defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>;
9322defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>;
9323defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>;
9324
9325
9326multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9327                                 SDNode InVecOp> {
9328  // 128-bit patterns
9329  let Predicates = [HasVLX, HasBWI] in {
9330  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9331            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9332  def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9333            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9334  def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9335            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9336  def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9337            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9338  def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9339            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9340  }
9341  let Predicates = [HasVLX] in {
9342  def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9343            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9344  def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9345            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9346  def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9347            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9348  def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9349            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9350
9351  def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9352            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9353  def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9354            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9355  def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
9356            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9357  def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
9358            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9359
9360  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9361            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9362  def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9363            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9364  def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9365            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9366  def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9367            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9368  def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9369            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9370
9371  def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9372            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9373  def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
9374            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9375  def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
9376            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9377  def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
9378            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9379
9380  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9381            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9382  def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9383            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9384  def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9385            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9386  def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
9387            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9388  def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
9389            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9390  }
9391  // 256-bit patterns
9392  let Predicates = [HasVLX, HasBWI] in {
9393  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9394            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9395  def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9396            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9397  def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9398            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9399  }
9400  let Predicates = [HasVLX] in {
9401  def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9402            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9403  def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
9404            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9405  def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9406            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9407  def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9408            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9409
9410  def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9411            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9412  def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
9413            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9414  def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
9415            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9416  def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9417            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9418
9419  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9420            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9421  def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9422            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9423  def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9424            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9425
9426  def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9427            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9428  def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
9429            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9430  def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
9431            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9432  def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9433            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9434
9435  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
9436            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9437  def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
9438            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9439  def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
9440            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9441  }
9442  // 512-bit patterns
9443  let Predicates = [HasBWI] in {
9444  def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
9445            (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9446  }
9447  let Predicates = [HasAVX512] in {
9448  def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9449            (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9450
9451  def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9452            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9453  def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
9454            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9455
9456  def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
9457            (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9458
9459  def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
9460            (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9461
9462  def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
9463            (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9464  }
9465}
9466
9467defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec>;
9468defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec>;
9469
9470//===----------------------------------------------------------------------===//
9471// GATHER - SCATTER Operations
9472
9473// FIXME: Improve scheduling of gather/scatter instructions.
9474multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9475                         X86MemOperand memop, PatFrag GatherNode,
9476                         RegisterClass MaskRC = _.KRCWM> {
9477  let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9478      ExeDomain = _.ExeDomain in
9479  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9480            (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9481            !strconcat(OpcodeStr#_.Suffix,
9482            "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9483            [(set _.RC:$dst, MaskRC:$mask_wb,
9484              (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
9485                     vectoraddr:$src2))]>, EVEX, EVEX_K,
9486             EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9487}
9488
9489multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9490                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9491  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9492                                      vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9493  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9494                                      vz512mem,  mgatherv8i64>, EVEX_V512, VEX_W;
9495let Predicates = [HasVLX] in {
9496  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9497                              vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9498  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9499                              vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9500  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9501                              vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9502  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9503                              vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9504}
9505}
9506
9507multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9508                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9509  defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9510                                       mgatherv16i32>, EVEX_V512;
9511  defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9512                                       mgatherv8i64>, EVEX_V512;
9513let Predicates = [HasVLX] in {
9514  defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9515                                          vy256xmem, mgatherv8i32>, EVEX_V256;
9516  defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9517                                          vy128xmem, mgatherv4i64>, EVEX_V256;
9518  defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9519                                          vx128xmem, mgatherv4i32>, EVEX_V128;
9520  defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9521                                          vx64xmem, mgatherv2i64, VK2WM>,
9522                                          EVEX_V128;
9523}
9524}
9525
9526
9527defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9528               avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9529
9530defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9531                avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9532
9533multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9534                          X86MemOperand memop, PatFrag ScatterNode,
9535                          RegisterClass MaskRC = _.KRCWM> {
9536
9537let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9538
9539  def mr  : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9540            (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9541            !strconcat(OpcodeStr#_.Suffix,
9542            "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9543            [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9544                                    MaskRC:$mask,  vectoraddr:$dst))]>,
9545            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9546            Sched<[WriteStore]>;
9547}
9548
9549multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9550                        AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9551  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9552                                      vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9553  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9554                                      vz512mem,  mscatterv8i64>, EVEX_V512, VEX_W;
9555let Predicates = [HasVLX] in {
9556  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9557                              vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9558  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9559                              vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9560  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9561                              vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9562  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9563                              vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9564}
9565}
9566
9567multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9568                       AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9569  defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9570                                       mscatterv16i32>, EVEX_V512;
9571  defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9572                                       mscatterv8i64>, EVEX_V512;
9573let Predicates = [HasVLX] in {
9574  defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9575                                          vy256xmem, mscatterv8i32>, EVEX_V256;
9576  defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9577                                          vy128xmem, mscatterv4i64>, EVEX_V256;
9578  defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9579                                          vx128xmem, mscatterv4i32>, EVEX_V128;
9580  defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9581                                          vx64xmem, mscatterv2i64, VK2WM>,
9582                                          EVEX_V128;
9583}
9584}
9585
9586defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9587               avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9588
9589defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9590                avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9591
9592// prefetch
9593multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9594                       RegisterClass KRC, X86MemOperand memop> {
9595  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9596  def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9597            !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9598            EVEX, EVEX_K, Sched<[WriteLoad]>;
9599}
9600
9601defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9602                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9603
9604defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9605                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9606
9607defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9608                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9609
9610defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9611                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9612
9613defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9614                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9615
9616defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9617                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9618
9619defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9620                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9621
9622defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9623                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9624
9625defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9626                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9627
9628defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9629                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9630
9631defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9632                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9633
9634defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9635                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9636
9637defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9638                     VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9639
9640defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9641                     VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9642
9643defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9644                     VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9645
9646defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9647                     VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9648
9649multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9650def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9651                  !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9652                  [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9653                  EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9654}
9655
9656multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9657                                 string OpcodeStr, Predicate prd> {
9658let Predicates = [prd] in
9659  defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9660
9661  let Predicates = [prd, HasVLX] in {
9662    defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9663    defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9664  }
9665}
9666
9667defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9668defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9669defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9670defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9671
9672multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9673    def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9674                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9675                        [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9676                        EVEX, Sched<[WriteMove]>;
9677}
9678
9679// Use 512bit version to implement 128/256 bit in case NoVLX.
9680multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9681                                           X86VectorVTInfo _,
9682                                           string Name> {
9683
9684  def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9685            (_.KVT (COPY_TO_REGCLASS
9686                     (!cast<Instruction>(Name#"Zrr")
9687                       (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9688                                      _.RC:$src, _.SubRegIdx)),
9689                   _.KRC))>;
9690}
9691
9692multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9693                                   AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9694  let Predicates = [prd] in
9695    defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9696                                            EVEX_V512;
9697
9698  let Predicates = [prd, HasVLX] in {
9699    defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9700                                              EVEX_V256;
9701    defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9702                                               EVEX_V128;
9703  }
9704  let Predicates = [prd, NoVLX] in {
9705    defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9706    defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9707  }
9708}
9709
9710defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9711                                              avx512vl_i8_info, HasBWI>;
9712defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9713                                              avx512vl_i16_info, HasBWI>, VEX_W;
9714defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9715                                              avx512vl_i32_info, HasDQI>;
9716defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9717                                              avx512vl_i64_info, HasDQI>, VEX_W;
9718
9719// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9720// is available, but BWI is not. We can't handle this in lowering because
9721// a target independent DAG combine likes to combine sext and trunc.
9722let Predicates = [HasDQI, NoBWI] in {
9723  def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9724            (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9725  def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9726            (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9727}
9728
9729//===----------------------------------------------------------------------===//
9730// AVX-512 - COMPRESS and EXPAND
9731//
9732
9733multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9734                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9735  defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9736              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9737              (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
9738              Sched<[sched]>;
9739
9740  let mayStore = 1, hasSideEffects = 0 in
9741  def mr : AVX5128I<opc, MRMDestMem, (outs),
9742              (ins _.MemOp:$dst, _.RC:$src),
9743              OpcodeStr # "\t{$src, $dst|$dst, $src}",
9744              []>, EVEX_CD8<_.EltSize, CD8VT1>,
9745              Sched<[sched.Folded]>;
9746
9747  def mrk : AVX5128I<opc, MRMDestMem, (outs),
9748              (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9749              OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9750              []>,
9751              EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9752              Sched<[sched.Folded]>;
9753}
9754
9755multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9756  def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
9757                                               (_.VT _.RC:$src)),
9758            (!cast<Instruction>(Name#_.ZSuffix##mrk)
9759                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9760}
9761
9762multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9763                                 X86FoldableSchedWrite sched,
9764                                 AVX512VLVectorVTInfo VTInfo,
9765                                 Predicate Pred = HasAVX512> {
9766  let Predicates = [Pred] in
9767  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9768           compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9769
9770  let Predicates = [Pred, HasVLX] in {
9771    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9772                compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9773    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9774                compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9775  }
9776}
9777
9778// FIXME: Is there a better scheduler class for VPCOMPRESS?
9779defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9780                                          avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9781defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9782                                          avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9783defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9784                                          avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9785defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9786                                          avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9787
9788// expand
9789multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9790                                 string OpcodeStr, X86FoldableSchedWrite sched> {
9791  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9792              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9793              (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
9794              Sched<[sched]>;
9795
9796  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9797              (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9798              (_.VT (X86expand (_.VT (bitconvert
9799                                      (_.LdFrag addr:$src1)))))>,
9800            AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9801            Sched<[sched.Folded, ReadAfterLd]>;
9802}
9803
9804multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9805
9806  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9807            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9808                                        _.KRCWM:$mask, addr:$src)>;
9809
9810  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
9811            (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9812                                        _.KRCWM:$mask, addr:$src)>;
9813
9814  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9815                                               (_.VT _.RC:$src0))),
9816            (!cast<Instruction>(Name#_.ZSuffix##rmk)
9817                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9818}
9819
9820multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
9821                               X86FoldableSchedWrite sched,
9822                               AVX512VLVectorVTInfo VTInfo,
9823                               Predicate Pred = HasAVX512> {
9824  let Predicates = [Pred] in
9825  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
9826           expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9827
9828  let Predicates = [Pred, HasVLX] in {
9829    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
9830                expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9831    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
9832                expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9833  }
9834}
9835
9836// FIXME: Is there a better scheduler class for VPEXPAND?
9837defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
9838                                      avx512vl_i32_info>, EVEX;
9839defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
9840                                      avx512vl_i64_info>, EVEX, VEX_W;
9841defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
9842                                      avx512vl_f32_info>, EVEX;
9843defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
9844                                      avx512vl_f64_info>, EVEX, VEX_W;
9845
9846//handle instruction  reg_vec1 = op(reg_vec,imm)
9847//                               op(mem_vec,imm)
9848//                               op(broadcast(eltVt),imm)
9849//all instruction created with FROUND_CURRENT
9850multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9851                                      X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9852  let ExeDomain = _.ExeDomain in {
9853  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9854                      (ins _.RC:$src1, i32u8imm:$src2),
9855                      OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9856                      (OpNode (_.VT _.RC:$src1),
9857                              (i32 imm:$src2))>, Sched<[sched]>;
9858  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9859                    (ins _.MemOp:$src1, i32u8imm:$src2),
9860                    OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9861                    (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
9862                            (i32 imm:$src2))>,
9863                    Sched<[sched.Folded, ReadAfterLd]>;
9864  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9865                    (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
9866                    OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
9867                    "${src1}"##_.BroadcastStr##", $src2",
9868                    (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
9869                            (i32 imm:$src2))>, EVEX_B,
9870                    Sched<[sched.Folded, ReadAfterLd]>;
9871  }
9872}
9873
9874//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9875multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9876                                          SDNode OpNode, X86FoldableSchedWrite sched,
9877                                          X86VectorVTInfo _> {
9878  let ExeDomain = _.ExeDomain in
9879  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9880                      (ins _.RC:$src1, i32u8imm:$src2),
9881                      OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
9882                      "$src1, {sae}, $src2",
9883                      (OpNode (_.VT _.RC:$src1),
9884                              (i32 imm:$src2),
9885                              (i32 FROUND_NO_EXC))>,
9886                      EVEX_B, Sched<[sched]>;
9887}
9888
9889multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
9890            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9891            SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
9892  let Predicates = [prd] in {
9893    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
9894                                           _.info512>,
9895                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
9896                                               sched.ZMM, _.info512>, EVEX_V512;
9897  }
9898  let Predicates = [prd, HasVLX] in {
9899    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
9900                                           _.info128>, EVEX_V128;
9901    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
9902                                           _.info256>, EVEX_V256;
9903  }
9904}
9905
9906//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9907//                               op(reg_vec2,mem_vec,imm)
9908//                               op(reg_vec2,broadcast(eltVt),imm)
9909//all instruction created with FROUND_CURRENT
9910multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9911                                X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9912  let ExeDomain = _.ExeDomain in {
9913  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9914                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9915                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9916                      (OpNode (_.VT _.RC:$src1),
9917                              (_.VT _.RC:$src2),
9918                              (i32 imm:$src3))>,
9919                      Sched<[sched]>;
9920  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9921                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
9922                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9923                    (OpNode (_.VT _.RC:$src1),
9924                            (_.VT (bitconvert (_.LdFrag addr:$src2))),
9925                            (i32 imm:$src3))>,
9926                    Sched<[sched.Folded, ReadAfterLd]>;
9927  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9928                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9929                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9930                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
9931                    (OpNode (_.VT _.RC:$src1),
9932                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9933                            (i32 imm:$src3))>, EVEX_B,
9934                    Sched<[sched.Folded, ReadAfterLd]>;
9935  }
9936}
9937
9938//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9939//                               op(reg_vec2,mem_vec,imm)
9940multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9941                              X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
9942                              X86VectorVTInfo SrcInfo>{
9943  let ExeDomain = DestInfo.ExeDomain in {
9944  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9945                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9946                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9947                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9948                               (SrcInfo.VT SrcInfo.RC:$src2),
9949                               (i8 imm:$src3)))>,
9950                  Sched<[sched]>;
9951  defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9952                (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9953                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9954                (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9955                             (SrcInfo.VT (bitconvert
9956                                                (SrcInfo.LdFrag addr:$src2))),
9957                             (i8 imm:$src3)))>,
9958                Sched<[sched.Folded, ReadAfterLd]>;
9959  }
9960}
9961
9962//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9963//                               op(reg_vec2,mem_vec,imm)
9964//                               op(reg_vec2,broadcast(eltVt),imm)
9965multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9966                           X86FoldableSchedWrite sched, X86VectorVTInfo _>:
9967  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
9968
9969  let ExeDomain = _.ExeDomain in
9970  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9971                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9972                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9973                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
9974                    (OpNode (_.VT _.RC:$src1),
9975                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9976                            (i8 imm:$src3))>, EVEX_B,
9977                    Sched<[sched.Folded, ReadAfterLd]>;
9978}
9979
9980//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
9981//                                      op(reg_vec2,mem_scalar,imm)
9982multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9983                                X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9984  let ExeDomain = _.ExeDomain in {
9985  defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9986                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9987                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9988                      (OpNode (_.VT _.RC:$src1),
9989                              (_.VT _.RC:$src2),
9990                              (i32 imm:$src3))>,
9991                      Sched<[sched]>;
9992  defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9993                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9994                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9995                    (OpNode (_.VT _.RC:$src1),
9996                            (_.VT (scalar_to_vector
9997                                      (_.ScalarLdFrag addr:$src2))),
9998                            (i32 imm:$src3))>,
9999                    Sched<[sched.Folded, ReadAfterLd]>;
10000  }
10001}
10002
10003//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10004multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10005                                    SDNode OpNode, X86FoldableSchedWrite sched,
10006                                    X86VectorVTInfo _> {
10007  let ExeDomain = _.ExeDomain in
10008  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10009                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10010                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10011                      "$src1, $src2, {sae}, $src3",
10012                      (OpNode (_.VT _.RC:$src1),
10013                              (_.VT _.RC:$src2),
10014                              (i32 imm:$src3),
10015                              (i32 FROUND_NO_EXC))>,
10016                      EVEX_B, Sched<[sched]>;
10017}
10018
10019//handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10020multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10021                                    X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10022  let ExeDomain = _.ExeDomain in
10023  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10024                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10025                      OpcodeStr, "$src3, {sae}, $src2, $src1",
10026                      "$src1, $src2, {sae}, $src3",
10027                      (OpNode (_.VT _.RC:$src1),
10028                              (_.VT _.RC:$src2),
10029                              (i32 imm:$src3),
10030                              (i32 FROUND_NO_EXC))>,
10031                      EVEX_B, Sched<[sched]>;
10032}
10033
10034multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10035            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10036            SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10037  let Predicates = [prd] in {
10038    defm Z    : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10039                avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
10040                                  EVEX_V512;
10041
10042  }
10043  let Predicates = [prd, HasVLX] in {
10044    defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10045                                  EVEX_V128;
10046    defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10047                                  EVEX_V256;
10048  }
10049}
10050
10051multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10052                   X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10053                   AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10054  let Predicates = [Pred] in {
10055    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10056                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10057  }
10058  let Predicates = [Pred, HasVLX] in {
10059    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10060                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10061    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10062                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10063  }
10064}
10065
10066multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10067                                  bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10068                                  Predicate Pred = HasAVX512> {
10069  let Predicates = [Pred] in {
10070    defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10071                                EVEX_V512;
10072  }
10073  let Predicates = [Pred, HasVLX] in {
10074    defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10075                                EVEX_V128;
10076    defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10077                                EVEX_V256;
10078  }
10079}
10080
10081multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10082                  X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10083                  SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
10084  let Predicates = [prd] in {
10085     defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10086              avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
10087  }
10088}
10089
10090multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10091                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10092                    SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10093  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10094                            opcPs, OpNode, OpNodeRnd, sched, prd>,
10095                            EVEX_CD8<32, CD8VF>;
10096  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10097                            opcPd, OpNode, OpNodeRnd, sched, prd>,
10098                            EVEX_CD8<64, CD8VF>, VEX_W;
10099}
10100
10101defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10102                              X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
10103                              AVX512AIi8Base, EVEX;
10104defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10105                              X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
10106                              AVX512AIi8Base, EVEX;
10107defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10108                              X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
10109                              AVX512AIi8Base, EVEX;
10110
10111defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10112                                                0x50, X86VRange, X86VRangeRnd,
10113                                                SchedWriteFAdd, HasDQI>,
10114      AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10115defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10116                                                0x50, X86VRange, X86VRangeRnd,
10117                                                SchedWriteFAdd, HasDQI>,
10118      AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10119
10120defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10121      f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10122      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10123defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10124      0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10125      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10126
10127defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10128      0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10129      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10130defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10131      0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10132      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10133
10134defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10135      0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10136      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10137defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10138      0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10139      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10140
10141
10142multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
10143  // Register
10144  def : Pat<(_.VT (ffloor _.RC:$src)),
10145            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10146             _.RC:$src, (i32 0x9))>;
10147  def : Pat<(_.VT (fnearbyint _.RC:$src)),
10148            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10149             _.RC:$src, (i32 0xC))>;
10150  def : Pat<(_.VT (fceil _.RC:$src)),
10151            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10152             _.RC:$src, (i32 0xA))>;
10153  def : Pat<(_.VT (frint _.RC:$src)),
10154            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10155             _.RC:$src, (i32 0x4))>;
10156  def : Pat<(_.VT (ftrunc _.RC:$src)),
10157            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10158             _.RC:$src, (i32 0xB))>;
10159
10160  // Merge-masking
10161  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
10162            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10163             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10164  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
10165            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10166             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10167  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
10168            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10169             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10170  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
10171            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10172             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10173  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
10174            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10175             _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10176
10177  // Zero-masking
10178  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
10179                           _.ImmAllZerosV)),
10180            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10181             _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10182  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
10183                           _.ImmAllZerosV)),
10184            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10185             _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10186  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
10187                           _.ImmAllZerosV)),
10188            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10189             _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10190  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
10191                           _.ImmAllZerosV)),
10192            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10193             _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10194  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
10195                           _.ImmAllZerosV)),
10196            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10197             _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10198
10199  // Load
10200  def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
10201            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10202             addr:$src, (i32 0x9))>;
10203  def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
10204            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10205             addr:$src, (i32 0xC))>;
10206  def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
10207            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10208             addr:$src, (i32 0xA))>;
10209  def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
10210            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10211             addr:$src, (i32 0x4))>;
10212  def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
10213            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10214             addr:$src, (i32 0xB))>;
10215
10216  // Merge-masking + load
10217  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10218                           _.RC:$dst)),
10219            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10220             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10221  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
10222                           _.RC:$dst)),
10223            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10224             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10225  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
10226                           _.RC:$dst)),
10227            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10228             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10229  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
10230                           _.RC:$dst)),
10231            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10232             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10233  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
10234                           _.RC:$dst)),
10235            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10236             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10237
10238  // Zero-masking + load
10239  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10240                           _.ImmAllZerosV)),
10241            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10242             _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10243  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
10244                           _.ImmAllZerosV)),
10245            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10246             _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10247  def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
10248                           _.ImmAllZerosV)),
10249            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10250             _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10251  def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
10252                           _.ImmAllZerosV)),
10253            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10254             _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10255  def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
10256                           _.ImmAllZerosV)),
10257            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10258             _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10259
10260  // Broadcast load
10261  def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10262            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10263             addr:$src, (i32 0x9))>;
10264  def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10265            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10266             addr:$src, (i32 0xC))>;
10267  def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10268            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10269             addr:$src, (i32 0xA))>;
10270  def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10271            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10272             addr:$src, (i32 0x4))>;
10273  def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
10274            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
10275             addr:$src, (i32 0xB))>;
10276
10277  // Merge-masking + broadcast load
10278  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10279                           (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10280                           _.RC:$dst)),
10281            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10282             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10283  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10284                           (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10285                           _.RC:$dst)),
10286            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10287             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10288  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10289                           (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10290                           _.RC:$dst)),
10291            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10292             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10293  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10294                           (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10295                           _.RC:$dst)),
10296            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10297             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10298  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10299                           (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10300                           _.RC:$dst)),
10301            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
10302             _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10303
10304  // Zero-masking + broadcast load
10305  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10306                           (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10307                           _.ImmAllZerosV)),
10308            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10309             _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10310  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10311                           (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10312                           _.ImmAllZerosV)),
10313            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10314             _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10315  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10316                           (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10317                           _.ImmAllZerosV)),
10318            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10319             _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10320  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10321                           (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10322                           _.ImmAllZerosV)),
10323            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10324             _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10325  def : Pat<(_.VT (vselect _.KRCWM:$mask,
10326                           (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
10327                           _.ImmAllZerosV)),
10328            (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
10329             _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10330}
10331
10332let Predicates = [HasAVX512] in {
10333  defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
10334  defm : AVX512_rndscale_lowering<v8f64_info,  "PD">;
10335}
10336
10337let Predicates = [HasVLX] in {
10338  defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
10339  defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
10340  defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
10341  defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
10342}
10343
10344multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10345                                          X86FoldableSchedWrite sched,
10346                                          X86VectorVTInfo _,
10347                                          X86VectorVTInfo CastInfo,
10348                                          string EVEX2VEXOvrd> {
10349  let ExeDomain = _.ExeDomain in {
10350  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10351                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10352                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10353                  (_.VT (bitconvert
10354                         (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10355                                                  (i8 imm:$src3)))))>,
10356                  Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10357  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10358                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10359                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10360                (_.VT
10361                 (bitconvert
10362                  (CastInfo.VT (X86Shuf128 _.RC:$src1,
10363                                           (bitconvert (_.LdFrag addr:$src2)),
10364                                           (i8 imm:$src3)))))>,
10365                Sched<[sched.Folded, ReadAfterLd]>,
10366                EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10367  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10368                    (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10369                    OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10370                    "$src1, ${src2}"##_.BroadcastStr##", $src3",
10371                    (_.VT
10372                     (bitconvert
10373                      (CastInfo.VT
10374                       (X86Shuf128 _.RC:$src1,
10375                                   (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
10376                                   (i8 imm:$src3)))))>, EVEX_B,
10377                    Sched<[sched.Folded, ReadAfterLd]>;
10378  }
10379}
10380
10381multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10382                                   AVX512VLVectorVTInfo _,
10383                                   AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10384                                   string EVEX2VEXOvrd>{
10385  let Predicates = [HasAVX512] in
10386  defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10387                                          _.info512, CastInfo.info512, "">, EVEX_V512;
10388
10389  let Predicates = [HasAVX512, HasVLX] in
10390  defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10391                                             _.info256, CastInfo.info256,
10392                                             EVEX2VEXOvrd>, EVEX_V256;
10393}
10394
10395defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10396      avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10397defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10398      avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10399defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10400      avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10401defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10402      avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10403
10404let Predicates = [HasAVX512] in {
10405// Provide fallback in case the load node that is used in the broadcast
10406// patterns above is used by additional users, which prevents the pattern
10407// selection.
10408def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10409          (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10410                          (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10411                          0)>;
10412def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10413          (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10414                          (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10415                          0)>;
10416
10417def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10418          (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10419                          (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10420                          0)>;
10421def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10422          (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10423                          (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10424                          0)>;
10425
10426def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10427          (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10428                          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10429                          0)>;
10430
10431def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10432          (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10433                          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10434                          0)>;
10435}
10436
10437multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10438                         X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10439  // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10440  // instantiation of this class.
10441  let ExeDomain = _.ExeDomain in {
10442  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10443                  (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10444                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10445                  (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
10446                  Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10447  defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10448                (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10449                OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10450                (_.VT (X86VAlign _.RC:$src1,
10451                                 (bitconvert (_.LdFrag addr:$src2)),
10452                                 (i8 imm:$src3)))>,
10453                Sched<[sched.Folded, ReadAfterLd]>,
10454                EVEX2VEXOverride<"VPALIGNRrmi">;
10455
10456  defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10457                   (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10458                   OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10459                   "$src1, ${src2}"##_.BroadcastStr##", $src3",
10460                   (X86VAlign _.RC:$src1,
10461                              (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10462                              (i8 imm:$src3))>, EVEX_B,
10463                   Sched<[sched.Folded, ReadAfterLd]>;
10464  }
10465}
10466
10467multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10468                                AVX512VLVectorVTInfo _> {
10469  let Predicates = [HasAVX512] in {
10470    defm Z    : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10471                                AVX512AIi8Base, EVEX_4V, EVEX_V512;
10472  }
10473  let Predicates = [HasAVX512, HasVLX] in {
10474    defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10475                                AVX512AIi8Base, EVEX_4V, EVEX_V128;
10476    // We can't really override the 256-bit version so change it back to unset.
10477    let EVEX2VEXOverride = ? in
10478    defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10479                                AVX512AIi8Base, EVEX_4V, EVEX_V256;
10480  }
10481}
10482
10483defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10484                                   avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10485defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10486                                   avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10487                                   VEX_W;
10488
10489defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10490                                         SchedWriteShuffle, avx512vl_i8_info,
10491                                         avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10492
10493// Fragments to help convert valignq into masked valignd. Or valignq/valignd
10494// into vpalignr.
10495def ValignqImm32XForm : SDNodeXForm<imm, [{
10496  return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10497}]>;
10498def ValignqImm8XForm : SDNodeXForm<imm, [{
10499  return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10500}]>;
10501def ValigndImm8XForm : SDNodeXForm<imm, [{
10502  return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10503}]>;
10504
10505multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10506                                        X86VectorVTInfo From, X86VectorVTInfo To,
10507                                        SDNodeXForm ImmXForm> {
10508  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10509                            (bitconvert
10510                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10511                                              imm:$src3))),
10512                            To.RC:$src0)),
10513            (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10514                                                  To.RC:$src1, To.RC:$src2,
10515                                                  (ImmXForm imm:$src3))>;
10516
10517  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10518                            (bitconvert
10519                             (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10520                                              imm:$src3))),
10521                            To.ImmAllZerosV)),
10522            (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10523                                                   To.RC:$src1, To.RC:$src2,
10524                                                   (ImmXForm imm:$src3))>;
10525
10526  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10527                            (bitconvert
10528                             (From.VT (OpNode From.RC:$src1,
10529                                      (bitconvert (To.LdFrag addr:$src2)),
10530                                      imm:$src3))),
10531                            To.RC:$src0)),
10532            (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10533                                                  To.RC:$src1, addr:$src2,
10534                                                  (ImmXForm imm:$src3))>;
10535
10536  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10537                            (bitconvert
10538                             (From.VT (OpNode From.RC:$src1,
10539                                      (bitconvert (To.LdFrag addr:$src2)),
10540                                      imm:$src3))),
10541                            To.ImmAllZerosV)),
10542            (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10543                                                   To.RC:$src1, addr:$src2,
10544                                                   (ImmXForm imm:$src3))>;
10545}
10546
10547multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10548                                           X86VectorVTInfo From,
10549                                           X86VectorVTInfo To,
10550                                           SDNodeXForm ImmXForm> :
10551      avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10552  def : Pat<(From.VT (OpNode From.RC:$src1,
10553                             (bitconvert (To.VT (X86VBroadcast
10554                                                (To.ScalarLdFrag addr:$src2)))),
10555                             imm:$src3)),
10556            (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10557                                                  (ImmXForm imm:$src3))>;
10558
10559  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10560                            (bitconvert
10561                             (From.VT (OpNode From.RC:$src1,
10562                                      (bitconvert
10563                                       (To.VT (X86VBroadcast
10564                                               (To.ScalarLdFrag addr:$src2)))),
10565                                      imm:$src3))),
10566                            To.RC:$src0)),
10567            (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10568                                                   To.RC:$src1, addr:$src2,
10569                                                   (ImmXForm imm:$src3))>;
10570
10571  def : Pat<(To.VT (vselect To.KRCWM:$mask,
10572                            (bitconvert
10573                             (From.VT (OpNode From.RC:$src1,
10574                                      (bitconvert
10575                                       (To.VT (X86VBroadcast
10576                                               (To.ScalarLdFrag addr:$src2)))),
10577                                      imm:$src3))),
10578                            To.ImmAllZerosV)),
10579            (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10580                                                    To.RC:$src1, addr:$src2,
10581                                                    (ImmXForm imm:$src3))>;
10582}
10583
10584let Predicates = [HasAVX512] in {
10585  // For 512-bit we lower to the widest element type we can. So we only need
10586  // to handle converting valignq to valignd.
10587  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10588                                         v16i32_info, ValignqImm32XForm>;
10589}
10590
10591let Predicates = [HasVLX] in {
10592  // For 128-bit we lower to the widest element type we can. So we only need
10593  // to handle converting valignq to valignd.
10594  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10595                                         v4i32x_info, ValignqImm32XForm>;
10596  // For 256-bit we lower to the widest element type we can. So we only need
10597  // to handle converting valignq to valignd.
10598  defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10599                                         v8i32x_info, ValignqImm32XForm>;
10600}
10601
10602let Predicates = [HasVLX, HasBWI] in {
10603  // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10604  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10605                                      v16i8x_info, ValignqImm8XForm>;
10606  defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10607                                      v16i8x_info, ValigndImm8XForm>;
10608}
10609
10610defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10611                SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10612                EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10613
10614multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10615                           X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10616  let ExeDomain = _.ExeDomain in {
10617  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10618                    (ins _.RC:$src1), OpcodeStr,
10619                    "$src1", "$src1",
10620                    (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
10621                    Sched<[sched]>;
10622
10623  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10624                  (ins _.MemOp:$src1), OpcodeStr,
10625                  "$src1", "$src1",
10626                  (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
10627            EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10628            Sched<[sched.Folded]>;
10629  }
10630}
10631
10632multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10633                            X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10634           avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10635  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10636                  (ins _.ScalarMemOp:$src1), OpcodeStr,
10637                  "${src1}"##_.BroadcastStr,
10638                  "${src1}"##_.BroadcastStr,
10639                  (_.VT (OpNode (X86VBroadcast
10640                                    (_.ScalarLdFrag addr:$src1))))>,
10641             EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10642             Sched<[sched.Folded]>;
10643}
10644
10645multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10646                              X86SchedWriteWidths sched,
10647                              AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10648  let Predicates = [prd] in
10649    defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10650                             EVEX_V512;
10651
10652  let Predicates = [prd, HasVLX] in {
10653    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10654                              EVEX_V256;
10655    defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10656                              EVEX_V128;
10657  }
10658}
10659
10660multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10661                               X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10662                               Predicate prd> {
10663  let Predicates = [prd] in
10664    defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10665                              EVEX_V512;
10666
10667  let Predicates = [prd, HasVLX] in {
10668    defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10669                                 EVEX_V256;
10670    defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10671                                 EVEX_V128;
10672  }
10673}
10674
10675multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10676                                 SDNode OpNode, X86SchedWriteWidths sched,
10677                                 Predicate prd> {
10678  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10679                               avx512vl_i64_info, prd>, VEX_W;
10680  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10681                               avx512vl_i32_info, prd>;
10682}
10683
10684multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10685                                 SDNode OpNode, X86SchedWriteWidths sched,
10686                                 Predicate prd> {
10687  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10688                              avx512vl_i16_info, prd>, VEX_WIG;
10689  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10690                              avx512vl_i8_info, prd>, VEX_WIG;
10691}
10692
10693multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10694                                  bits<8> opc_d, bits<8> opc_q,
10695                                  string OpcodeStr, SDNode OpNode,
10696                                  X86SchedWriteWidths sched> {
10697  defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10698                                    HasAVX512>,
10699              avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10700                                    HasBWI>;
10701}
10702
10703defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10704                                    SchedWriteVecALU>;
10705
10706// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10707let Predicates = [HasAVX512, NoVLX] in {
10708  def : Pat<(v4i64 (abs VR256X:$src)),
10709            (EXTRACT_SUBREG
10710                (VPABSQZrr
10711                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10712             sub_ymm)>;
10713  def : Pat<(v2i64 (abs VR128X:$src)),
10714            (EXTRACT_SUBREG
10715                (VPABSQZrr
10716                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10717             sub_xmm)>;
10718}
10719
10720// Use 512bit version to implement 128/256 bit.
10721multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10722                                 AVX512VLVectorVTInfo _, Predicate prd> {
10723  let Predicates = [prd, NoVLX] in {
10724    def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
10725              (EXTRACT_SUBREG
10726                (!cast<Instruction>(InstrStr # "Zrr")
10727                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10728                                 _.info256.RC:$src1,
10729                                 _.info256.SubRegIdx)),
10730              _.info256.SubRegIdx)>;
10731
10732    def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
10733              (EXTRACT_SUBREG
10734                (!cast<Instruction>(InstrStr # "Zrr")
10735                  (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10736                                 _.info128.RC:$src1,
10737                                 _.info128.SubRegIdx)),
10738              _.info128.SubRegIdx)>;
10739  }
10740}
10741
10742defm VPLZCNT    : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10743                                        SchedWriteVecIMul, HasCDI>;
10744
10745// FIXME: Is there a better scheduler class for VPCONFLICT?
10746defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10747                                        SchedWriteVecALU, HasCDI>;
10748
10749// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10750defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10751defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10752
10753//===---------------------------------------------------------------------===//
10754// Counts number of ones - VPOPCNTD and VPOPCNTQ
10755//===---------------------------------------------------------------------===//
10756
10757// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10758defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10759                                     SchedWriteVecALU, HasVPOPCNTDQ>;
10760
10761defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10762defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10763
10764//===---------------------------------------------------------------------===//
10765// Replicate Single FP - MOVSHDUP and MOVSLDUP
10766//===---------------------------------------------------------------------===//
10767
10768multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10769                            X86SchedWriteWidths sched> {
10770  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10771                                      avx512vl_f32_info, HasAVX512>, XS;
10772}
10773
10774defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10775                                  SchedWriteFShuffle>;
10776defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10777                                  SchedWriteFShuffle>;
10778
10779//===----------------------------------------------------------------------===//
10780// AVX-512 - MOVDDUP
10781//===----------------------------------------------------------------------===//
10782
10783multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
10784                              X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10785  let ExeDomain = _.ExeDomain in {
10786  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10787                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
10788                   (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
10789                   Sched<[sched]>;
10790  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10791                 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10792                 (_.VT (OpNode (_.VT (scalar_to_vector
10793                                       (_.ScalarLdFrag addr:$src)))))>,
10794                 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10795                 Sched<[sched.Folded]>;
10796  }
10797}
10798
10799multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10800                                 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10801  defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10802                           VTInfo.info512>, EVEX_V512;
10803
10804  let Predicates = [HasAVX512, HasVLX] in {
10805    defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10806                                VTInfo.info256>, EVEX_V256;
10807    defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
10808                                   VTInfo.info128>, EVEX_V128;
10809  }
10810}
10811
10812multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10813                          X86SchedWriteWidths sched> {
10814  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10815                                        avx512vl_f64_info>, XD, VEX_W;
10816}
10817
10818defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10819
10820let Predicates = [HasVLX] in {
10821def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
10822          (VMOVDDUPZ128rm addr:$src)>;
10823def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10824          (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10825def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10826          (VMOVDDUPZ128rm addr:$src)>;
10827
10828def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10829                   (v2f64 VR128X:$src0)),
10830          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10831                           (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10832def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10833                   (bitconvert (v4i32 immAllZerosV))),
10834          (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10835
10836def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10837                   (v2f64 VR128X:$src0)),
10838          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10839def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
10840                   (bitconvert (v4i32 immAllZerosV))),
10841          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10842
10843def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10844                   (v2f64 VR128X:$src0)),
10845          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10846def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
10847                   (bitconvert (v4i32 immAllZerosV))),
10848          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10849}
10850
10851//===----------------------------------------------------------------------===//
10852// AVX-512 - Unpack Instructions
10853//===----------------------------------------------------------------------===//
10854
10855defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10856                                 SchedWriteFShuffleSizes, 0, 1>;
10857defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10858                                 SchedWriteFShuffleSizes>;
10859
10860defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10861                                       SchedWriteShuffle, HasBWI>;
10862defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10863                                       SchedWriteShuffle, HasBWI>;
10864defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10865                                       SchedWriteShuffle, HasBWI>;
10866defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10867                                       SchedWriteShuffle, HasBWI>;
10868
10869defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10870                                       SchedWriteShuffle, HasAVX512>;
10871defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10872                                       SchedWriteShuffle, HasAVX512>;
10873defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10874                                        SchedWriteShuffle, HasAVX512>;
10875defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10876                                        SchedWriteShuffle, HasAVX512>;
10877
10878//===----------------------------------------------------------------------===//
10879// AVX-512 - Extract & Insert Integer Instructions
10880//===----------------------------------------------------------------------===//
10881
10882multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10883                                                            X86VectorVTInfo _> {
10884  def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10885              (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10886              OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10887              [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10888                       addr:$dst)]>,
10889              EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10890}
10891
10892multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10893  let Predicates = [HasBWI] in {
10894    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10895                  (ins _.RC:$src1, u8imm:$src2),
10896                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10897                  [(set GR32orGR64:$dst,
10898                        (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10899                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10900
10901    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10902  }
10903}
10904
10905multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10906  let Predicates = [HasBWI] in {
10907    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10908                  (ins _.RC:$src1, u8imm:$src2),
10909                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10910                  [(set GR32orGR64:$dst,
10911                        (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10912                  EVEX, PD, Sched<[WriteVecExtract]>;
10913
10914    let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10915    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10916                   (ins _.RC:$src1, u8imm:$src2),
10917                   OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10918                   EVEX, TAPD, FoldGenData<NAME#rr>,
10919                   Sched<[WriteVecExtract]>;
10920
10921    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10922  }
10923}
10924
10925multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10926                                                            RegisterClass GRC> {
10927  let Predicates = [HasDQI] in {
10928    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10929                  (ins _.RC:$src1, u8imm:$src2),
10930                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10931                  [(set GRC:$dst,
10932                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10933                  EVEX, TAPD, Sched<[WriteVecExtract]>;
10934
10935    def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10936                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10937                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10938                [(store (extractelt (_.VT _.RC:$src1),
10939                                    imm:$src2),addr:$dst)]>,
10940                EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10941                Sched<[WriteVecExtractSt]>;
10942  }
10943}
10944
10945defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10946defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10947defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10948defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10949
10950multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10951                                            X86VectorVTInfo _, PatFrag LdFrag> {
10952  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10953      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
10954      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10955      [(set _.RC:$dst,
10956          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10957      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
10958}
10959
10960multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10961                                            X86VectorVTInfo _, PatFrag LdFrag> {
10962  let Predicates = [HasBWI] in {
10963    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10964        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10965        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10966        [(set _.RC:$dst,
10967            (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10968        Sched<[WriteVecInsert]>;
10969
10970    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10971  }
10972}
10973
10974multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10975                                         X86VectorVTInfo _, RegisterClass GRC> {
10976  let Predicates = [HasDQI] in {
10977    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10978        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10979        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10980        [(set _.RC:$dst,
10981            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10982        EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10983
10984    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10985                                    _.ScalarLdFrag>, TAPD;
10986  }
10987}
10988
10989defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10990                                     extloadi8>, TAPD, VEX_WIG;
10991defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10992                                     extloadi16>, PD, VEX_WIG;
10993defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10994defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10995
10996//===----------------------------------------------------------------------===//
10997// VSHUFPS - VSHUFPD Operations
10998//===----------------------------------------------------------------------===//
10999
11000multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11001                        AVX512VLVectorVTInfo VTInfo_FP>{
11002  defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11003                                    SchedWriteFShuffle>,
11004                                    EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11005                                    AVX512AIi8Base, EVEX_4V;
11006}
11007
11008defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11009defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11010
11011//===----------------------------------------------------------------------===//
11012// AVX-512 - Byte shift Left/Right
11013//===----------------------------------------------------------------------===//
11014
11015// FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11016multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11017                               Format MRMm, string OpcodeStr,
11018                               X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11019  def rr : AVX512<opc, MRMr,
11020             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11021             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11022             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
11023             Sched<[sched]>;
11024  def rm : AVX512<opc, MRMm,
11025           (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11026           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11027           [(set _.RC:$dst,(_.VT (OpNode
11028                                 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11029                                 (i8 imm:$src2))))]>,
11030           Sched<[sched.Folded, ReadAfterLd]>;
11031}
11032
11033multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11034                                   Format MRMm, string OpcodeStr,
11035                                   X86SchedWriteWidths sched, Predicate prd>{
11036  let Predicates = [prd] in
11037    defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11038                                 sched.ZMM, v64i8_info>, EVEX_V512;
11039  let Predicates = [prd, HasVLX] in {
11040    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11041                                    sched.YMM, v32i8x_info>, EVEX_V256;
11042    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11043                                    sched.XMM, v16i8x_info>, EVEX_V128;
11044  }
11045}
11046defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11047                                       SchedWriteShuffle, HasBWI>,
11048                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11049defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11050                                       SchedWriteShuffle, HasBWI>,
11051                                       AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11052
11053multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11054                                string OpcodeStr, X86FoldableSchedWrite sched,
11055                                X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11056  def rr : AVX512BI<opc, MRMSrcReg,
11057             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11058             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11059             [(set _dst.RC:$dst,(_dst.VT
11060                                (OpNode (_src.VT _src.RC:$src1),
11061                                        (_src.VT _src.RC:$src2))))]>,
11062             Sched<[sched]>;
11063  def rm : AVX512BI<opc, MRMSrcMem,
11064           (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11065           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11066           [(set _dst.RC:$dst,(_dst.VT
11067                              (OpNode (_src.VT _src.RC:$src1),
11068                              (_src.VT (bitconvert
11069                                        (_src.LdFrag addr:$src2))))))]>,
11070           Sched<[sched.Folded, ReadAfterLd]>;
11071}
11072
11073multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11074                                    string OpcodeStr, X86SchedWriteWidths sched,
11075                                    Predicate prd> {
11076  let Predicates = [prd] in
11077    defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11078                                  v8i64_info, v64i8_info>, EVEX_V512;
11079  let Predicates = [prd, HasVLX] in {
11080    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11081                                     v4i64x_info, v32i8x_info>, EVEX_V256;
11082    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11083                                     v2i64x_info, v16i8x_info>, EVEX_V128;
11084  }
11085}
11086
11087defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11088                                        SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11089
11090// Transforms to swizzle an immediate to enable better matching when
11091// memory operand isn't in the right place.
11092def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11093  // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11094  uint8_t Imm = N->getZExtValue();
11095  // Swap bits 1/4 and 3/6.
11096  uint8_t NewImm = Imm & 0xa5;
11097  if (Imm & 0x02) NewImm |= 0x10;
11098  if (Imm & 0x10) NewImm |= 0x02;
11099  if (Imm & 0x08) NewImm |= 0x40;
11100  if (Imm & 0x40) NewImm |= 0x08;
11101  return getI8Imm(NewImm, SDLoc(N));
11102}]>;
11103def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11104  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11105  uint8_t Imm = N->getZExtValue();
11106  // Swap bits 2/4 and 3/5.
11107  uint8_t NewImm = Imm & 0xc3;
11108  if (Imm & 0x04) NewImm |= 0x10;
11109  if (Imm & 0x10) NewImm |= 0x04;
11110  if (Imm & 0x08) NewImm |= 0x20;
11111  if (Imm & 0x20) NewImm |= 0x08;
11112  return getI8Imm(NewImm, SDLoc(N));
11113}]>;
11114def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11115  // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11116  uint8_t Imm = N->getZExtValue();
11117  // Swap bits 1/2 and 5/6.
11118  uint8_t NewImm = Imm & 0x99;
11119  if (Imm & 0x02) NewImm |= 0x04;
11120  if (Imm & 0x04) NewImm |= 0x02;
11121  if (Imm & 0x20) NewImm |= 0x40;
11122  if (Imm & 0x40) NewImm |= 0x20;
11123  return getI8Imm(NewImm, SDLoc(N));
11124}]>;
11125def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11126  // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11127  uint8_t Imm = N->getZExtValue();
11128  // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11129  uint8_t NewImm = Imm & 0x81;
11130  if (Imm & 0x02) NewImm |= 0x04;
11131  if (Imm & 0x04) NewImm |= 0x10;
11132  if (Imm & 0x08) NewImm |= 0x40;
11133  if (Imm & 0x10) NewImm |= 0x02;
11134  if (Imm & 0x20) NewImm |= 0x08;
11135  if (Imm & 0x40) NewImm |= 0x20;
11136  return getI8Imm(NewImm, SDLoc(N));
11137}]>;
11138def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11139  // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11140  uint8_t Imm = N->getZExtValue();
11141  // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11142  uint8_t NewImm = Imm & 0x81;
11143  if (Imm & 0x02) NewImm |= 0x10;
11144  if (Imm & 0x04) NewImm |= 0x02;
11145  if (Imm & 0x08) NewImm |= 0x20;
11146  if (Imm & 0x10) NewImm |= 0x04;
11147  if (Imm & 0x20) NewImm |= 0x40;
11148  if (Imm & 0x40) NewImm |= 0x08;
11149  return getI8Imm(NewImm, SDLoc(N));
11150}]>;
11151
11152multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11153                          X86FoldableSchedWrite sched, X86VectorVTInfo _,
11154                          string Name>{
11155  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11156  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11157                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11158                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11159                      (OpNode (_.VT _.RC:$src1),
11160                              (_.VT _.RC:$src2),
11161                              (_.VT _.RC:$src3),
11162                              (i8 imm:$src4)), 1, 1>,
11163                      AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11164  defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11165                    (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11166                    OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11167                    (OpNode (_.VT _.RC:$src1),
11168                            (_.VT _.RC:$src2),
11169                            (_.VT (bitconvert (_.LdFrag addr:$src3))),
11170                            (i8 imm:$src4)), 1, 0>,
11171                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11172                    Sched<[sched.Folded, ReadAfterLd]>;
11173  defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11174                    (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11175                    OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11176                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11177                    (OpNode (_.VT _.RC:$src1),
11178                            (_.VT _.RC:$src2),
11179                            (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11180                            (i8 imm:$src4)), 1, 0>, EVEX_B,
11181                    AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11182                    Sched<[sched.Folded, ReadAfterLd]>;
11183  }// Constraints = "$src1 = $dst"
11184
11185  // Additional patterns for matching passthru operand in other positions.
11186  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11187                   (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11188                   _.RC:$src1)),
11189            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11190             _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11191  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11192                   (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11193                   _.RC:$src1)),
11194            (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11195             _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11196
11197  // Additional patterns for matching loads in other positions.
11198  def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11199                          _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11200            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11201                                   addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11202  def : Pat<(_.VT (OpNode _.RC:$src1,
11203                          (bitconvert (_.LdFrag addr:$src3)),
11204                          _.RC:$src2, (i8 imm:$src4))),
11205            (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11206                                   addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11207
11208  // Additional patterns for matching zero masking with loads in other
11209  // positions.
11210  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11211                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11212                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11213                   _.ImmAllZerosV)),
11214            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11215             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11216  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11217                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11218                    _.RC:$src2, (i8 imm:$src4)),
11219                   _.ImmAllZerosV)),
11220            (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11221             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11222
11223  // Additional patterns for matching masked loads with different
11224  // operand orders.
11225  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11226                   (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11227                    _.RC:$src2, (i8 imm:$src4)),
11228                   _.RC:$src1)),
11229            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11230             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11231  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11232                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11233                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11234                   _.RC:$src1)),
11235            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11236             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11237  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11238                   (OpNode _.RC:$src2, _.RC:$src1,
11239                    (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11240                   _.RC:$src1)),
11241            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11242             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11243  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11244                   (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11245                    _.RC:$src1, (i8 imm:$src4)),
11246                   _.RC:$src1)),
11247            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11248             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11249  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11250                   (OpNode (bitconvert (_.LdFrag addr:$src3)),
11251                    _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11252                   _.RC:$src1)),
11253            (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11254             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11255
11256  // Additional patterns for matching broadcasts in other positions.
11257  def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11258                          _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11259            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11260                                   addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11261  def : Pat<(_.VT (OpNode _.RC:$src1,
11262                          (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11263                          _.RC:$src2, (i8 imm:$src4))),
11264            (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11265                                   addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11266
11267  // Additional patterns for matching zero masking with broadcasts in other
11268  // positions.
11269  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11270                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11271                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11272                   _.ImmAllZerosV)),
11273            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11274             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11275             (VPTERNLOG321_imm8 imm:$src4))>;
11276  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11277                   (OpNode _.RC:$src1,
11278                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11279                    _.RC:$src2, (i8 imm:$src4)),
11280                   _.ImmAllZerosV)),
11281            (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11282             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11283             (VPTERNLOG132_imm8 imm:$src4))>;
11284
11285  // Additional patterns for matching masked broadcasts with different
11286  // operand orders.
11287  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11288                   (OpNode _.RC:$src1,
11289                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11290                    _.RC:$src2, (i8 imm:$src4)),
11291                   _.RC:$src1)),
11292            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11293             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11294  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11295                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11296                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11297                   _.RC:$src1)),
11298            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11299             _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11300  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11301                   (OpNode _.RC:$src2, _.RC:$src1,
11302                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11303                    (i8 imm:$src4)), _.RC:$src1)),
11304            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11305             _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11306  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11307                   (OpNode _.RC:$src2,
11308                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11309                    _.RC:$src1, (i8 imm:$src4)),
11310                   _.RC:$src1)),
11311            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11312             _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
11313  def : Pat<(_.VT (vselect _.KRCWM:$mask,
11314                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
11315                    _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
11316                   _.RC:$src1)),
11317            (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11318             _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
11319}
11320
11321multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11322                                 AVX512VLVectorVTInfo _> {
11323  let Predicates = [HasAVX512] in
11324    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11325                               _.info512, NAME>, EVEX_V512;
11326  let Predicates = [HasAVX512, HasVLX] in {
11327    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11328                               _.info128, NAME>, EVEX_V128;
11329    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11330                               _.info256, NAME>, EVEX_V256;
11331  }
11332}
11333
11334defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11335                                        avx512vl_i32_info>;
11336defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11337                                        avx512vl_i64_info>, VEX_W;
11338
11339// Patterns to implement vnot using vpternlog instead of creating all ones
11340// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11341// so that the result is only dependent on src0. But we use the same source
11342// for all operands to prevent a false dependency.
11343// TODO: We should maybe have a more generalized algorithm for folding to
11344// vpternlog.
11345let Predicates = [HasAVX512] in {
11346  def : Pat<(v8i64 (xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV)))),
11347            (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11348}
11349
11350let Predicates = [HasAVX512, NoVLX] in {
11351  def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11352            (EXTRACT_SUBREG
11353             (VPTERNLOGQZrri
11354              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11355              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11356              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11357              (i8 15)), sub_xmm)>;
11358  def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11359            (EXTRACT_SUBREG
11360             (VPTERNLOGQZrri
11361              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11362              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11363              (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11364              (i8 15)), sub_ymm)>;
11365}
11366
11367let Predicates = [HasVLX] in {
11368  def : Pat<(v2i64 (xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV)))),
11369            (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11370  def : Pat<(v4i64 (xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV)))),
11371            (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11372}
11373
11374//===----------------------------------------------------------------------===//
11375// AVX-512 - FixupImm
11376//===----------------------------------------------------------------------===//
11377
11378multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
11379                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11380                                  X86VectorVTInfo TblVT>{
11381  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11382    defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11383                        (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11384                         OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11385                        (OpNode (_.VT _.RC:$src1),
11386                                (_.VT _.RC:$src2),
11387                                (TblVT.VT _.RC:$src3),
11388                                (i32 imm:$src4),
11389                                (i32 FROUND_CURRENT))>, Sched<[sched]>;
11390    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11391                      (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11392                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11393                      (OpNode (_.VT _.RC:$src1),
11394                              (_.VT _.RC:$src2),
11395                              (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11396                              (i32 imm:$src4),
11397                              (i32 FROUND_CURRENT))>,
11398                      Sched<[sched.Folded, ReadAfterLd]>;
11399    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11400                      (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11401                    OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11402                    "$src2, ${src3}"##_.BroadcastStr##", $src4",
11403                      (OpNode (_.VT _.RC:$src1),
11404                              (_.VT _.RC:$src2),
11405                              (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
11406                              (i32 imm:$src4),
11407                              (i32 FROUND_CURRENT))>,
11408                    EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11409  } // Constraints = "$src1 = $dst"
11410}
11411
11412multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11413                                      SDNode OpNode, X86FoldableSchedWrite sched,
11414                                      X86VectorVTInfo _, X86VectorVTInfo TblVT>{
11415let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11416  defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11417                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11418                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11419                      "$src2, $src3, {sae}, $src4",
11420                      (OpNode (_.VT _.RC:$src1),
11421                                (_.VT _.RC:$src2),
11422                                (TblVT.VT _.RC:$src3),
11423                                (i32 imm:$src4),
11424                                (i32 FROUND_NO_EXC))>,
11425                      EVEX_B, Sched<[sched]>;
11426  }
11427}
11428
11429multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
11430                                  X86FoldableSchedWrite sched, X86VectorVTInfo _,
11431                                  X86VectorVTInfo _src3VT> {
11432  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11433      ExeDomain = _.ExeDomain in {
11434    defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11435                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11436                      OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11437                      (OpNode (_.VT _.RC:$src1),
11438                              (_.VT _.RC:$src2),
11439                              (_src3VT.VT _src3VT.RC:$src3),
11440                              (i32 imm:$src4),
11441                              (i32 FROUND_CURRENT))>, Sched<[sched]>;
11442    defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11443                      (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11444                      OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11445                      "$src2, $src3, {sae}, $src4",
11446                      (OpNode (_.VT _.RC:$src1),
11447                              (_.VT _.RC:$src2),
11448                              (_src3VT.VT _src3VT.RC:$src3),
11449                              (i32 imm:$src4),
11450                              (i32 FROUND_NO_EXC))>,
11451                      EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
11452    defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11453                     (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11454                     OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11455                     (OpNode (_.VT _.RC:$src1),
11456                             (_.VT _.RC:$src2),
11457                             (_src3VT.VT (scalar_to_vector
11458                                       (_src3VT.ScalarLdFrag addr:$src3))),
11459                             (i32 imm:$src4),
11460                             (i32 FROUND_CURRENT))>,
11461                     Sched<[sched.Folded, ReadAfterLd]>;
11462  }
11463}
11464
11465multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11466                                      AVX512VLVectorVTInfo _Vec,
11467                                      AVX512VLVectorVTInfo _Tbl> {
11468  let Predicates = [HasAVX512] in
11469    defm Z    : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11470                                       _Vec.info512, _Tbl.info512>,
11471                avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
11472                                _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11473                                EVEX_4V, EVEX_V512;
11474  let Predicates = [HasAVX512, HasVLX] in {
11475    defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
11476                            _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11477                            EVEX_4V, EVEX_V128;
11478    defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
11479                            _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11480                            EVEX_4V, EVEX_V256;
11481  }
11482}
11483
11484defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11485                                           SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11486                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11487defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
11488                                           SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11489                          AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11490defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11491                         avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11492defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11493                         avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11494
11495// Patterns used to select SSE scalar fp arithmetic instructions from
11496// either:
11497//
11498// (1) a scalar fp operation followed by a blend
11499//
11500// The effect is that the backend no longer emits unnecessary vector
11501// insert instructions immediately after SSE scalar fp instructions
11502// like addss or mulss.
11503//
11504// For example, given the following code:
11505//   __m128 foo(__m128 A, __m128 B) {
11506//     A[0] += B[0];
11507//     return A;
11508//   }
11509//
11510// Previously we generated:
11511//   addss %xmm0, %xmm1
11512//   movss %xmm1, %xmm0
11513//
11514// We now generate:
11515//   addss %xmm1, %xmm0
11516//
11517// (2) a vector packed single/double fp operation followed by a vector insert
11518//
11519// The effect is that the backend converts the packed fp instruction
11520// followed by a vector insert into a single SSE scalar fp instruction.
11521//
11522// For example, given the following code:
11523//   __m128 foo(__m128 A, __m128 B) {
11524//     __m128 C = A + B;
11525//     return (__m128) {c[0], a[1], a[2], a[3]};
11526//   }
11527//
11528// Previously we generated:
11529//   addps %xmm0, %xmm1
11530//   movss %xmm1, %xmm0
11531//
11532// We now generate:
11533//   addss %xmm1, %xmm0
11534
11535// TODO: Some canonicalization in lowering would simplify the number of
11536// patterns we have to try to match.
11537multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11538                                           X86VectorVTInfo _, PatLeaf ZeroFP> {
11539  let Predicates = [HasAVX512] in {
11540    // extracted scalar math op with insert via movss
11541    def : Pat<(MoveNode
11542               (_.VT VR128X:$dst),
11543               (_.VT (scalar_to_vector
11544                      (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11545                          _.FRC:$src)))),
11546              (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11547               (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11548
11549    // extracted masked scalar math op with insert via movss
11550    def : Pat<(MoveNode (_.VT VR128X:$src1),
11551               (scalar_to_vector
11552                (X86selects VK1WM:$mask,
11553                            (Op (_.EltVT
11554                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11555                                _.FRC:$src2),
11556                            _.FRC:$src0))),
11557              (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11558               (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11559               VK1WM:$mask, _.VT:$src1,
11560               (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11561
11562    // extracted masked scalar math op with insert via movss
11563    def : Pat<(MoveNode (_.VT VR128X:$src1),
11564               (scalar_to_vector
11565                (X86selects VK1WM:$mask,
11566                            (Op (_.EltVT
11567                                 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11568                                _.FRC:$src2), (_.EltVT ZeroFP)))),
11569      (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11570          VK1WM:$mask, _.VT:$src1,
11571          (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11572  }
11573}
11574
11575defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11576defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11577defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11578defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11579
11580defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11581defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11582defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11583defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11584
11585multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11586                                             SDNode Move, X86VectorVTInfo _> {
11587  let Predicates = [HasAVX512] in {
11588    def : Pat<(_.VT (Move _.VT:$dst,
11589                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11590              (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11591  }
11592}
11593
11594defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11595defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11596
11597multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
11598                                                 SDNode Move, X86VectorVTInfo _,
11599                                                 bits<8> ImmV> {
11600  let Predicates = [HasAVX512] in {
11601    def : Pat<(_.VT (Move _.VT:$dst,
11602                     (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11603              (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
11604                                                        (i32 ImmV))>;
11605  }
11606}
11607
11608defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
11609                                             v4f32x_info, 0x01>;
11610defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
11611                                             v4f32x_info, 0x02>;
11612defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
11613                                             v2f64x_info, 0x01>;
11614defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
11615                                             v2f64x_info, 0x02>;
11616
11617//===----------------------------------------------------------------------===//
11618// AES instructions
11619//===----------------------------------------------------------------------===//
11620
11621multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11622  let Predicates = [HasVLX, HasVAES] in {
11623    defm Z128 : AESI_binop_rm_int<Op, OpStr,
11624                                  !cast<Intrinsic>(IntPrefix),
11625                                  loadv2i64, 0, VR128X, i128mem>,
11626                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11627    defm Z256 : AESI_binop_rm_int<Op, OpStr,
11628                                  !cast<Intrinsic>(IntPrefix##"_256"),
11629                                  loadv4i64, 0, VR256X, i256mem>,
11630                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11631    }
11632    let Predicates = [HasAVX512, HasVAES] in
11633    defm Z    : AESI_binop_rm_int<Op, OpStr,
11634                                  !cast<Intrinsic>(IntPrefix##"_512"),
11635                                  loadv8i64, 0, VR512, i512mem>,
11636                  EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11637}
11638
11639defm VAESENC      : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11640defm VAESENCLAST  : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11641defm VAESDEC      : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11642defm VAESDECLAST  : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11643
11644//===----------------------------------------------------------------------===//
11645// PCLMUL instructions - Carry less multiplication
11646//===----------------------------------------------------------------------===//
11647
11648let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11649defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11650                              EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11651
11652let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11653defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11654                              EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11655
11656defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11657                                int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11658                                EVEX_CD8<64, CD8VF>, VEX_WIG;
11659}
11660
11661// Aliases
11662defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11663defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11664defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11665
11666//===----------------------------------------------------------------------===//
11667// VBMI2
11668//===----------------------------------------------------------------------===//
11669
11670multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11671                              X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11672  let Constraints = "$src1 = $dst",
11673      ExeDomain   = VTI.ExeDomain in {
11674    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11675                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11676                "$src3, $src2", "$src2, $src3",
11677                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11678                AVX512FMA3Base, Sched<[sched]>;
11679    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11680                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11681                "$src3, $src2", "$src2, $src3",
11682                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11683                        (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
11684                AVX512FMA3Base,
11685                Sched<[sched.Folded, ReadAfterLd]>;
11686  }
11687}
11688
11689multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11690                               X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11691         : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11692  let Constraints = "$src1 = $dst",
11693      ExeDomain   = VTI.ExeDomain in
11694  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11695              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11696              "${src3}"##VTI.BroadcastStr##", $src2",
11697              "$src2, ${src3}"##VTI.BroadcastStr,
11698              (OpNode VTI.RC:$src1, VTI.RC:$src2,
11699               (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
11700              AVX512FMA3Base, EVEX_B,
11701              Sched<[sched.Folded, ReadAfterLd]>;
11702}
11703
11704multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11705                                     X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11706  let Predicates = [HasVBMI2] in
11707  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11708                                   EVEX_V512;
11709  let Predicates = [HasVBMI2, HasVLX] in {
11710    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11711                                   EVEX_V256;
11712    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11713                                   EVEX_V128;
11714  }
11715}
11716
11717multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11718                                      X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11719  let Predicates = [HasVBMI2] in
11720  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11721                                    EVEX_V512;
11722  let Predicates = [HasVBMI2, HasVLX] in {
11723    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11724                                    EVEX_V256;
11725    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11726                                    EVEX_V128;
11727  }
11728}
11729multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11730                           SDNode OpNode, X86SchedWriteWidths sched> {
11731  defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11732             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11733  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11734             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11735  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11736             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11737}
11738
11739multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11740                           SDNode OpNode, X86SchedWriteWidths sched> {
11741  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11742             avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11743             VEX_W, EVEX_CD8<16, CD8VF>;
11744  defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11745             OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11746  defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11747             sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11748}
11749
11750// Concat & Shift
11751defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11752defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11753defm VPSHLD  : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11754defm VPSHRD  : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11755
11756// Compress
11757defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11758                                         avx512vl_i8_info, HasVBMI2>, EVEX,
11759                                         NotMemoryFoldable;
11760defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11761                                          avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11762                                          NotMemoryFoldable;
11763// Expand
11764defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11765                                      avx512vl_i8_info, HasVBMI2>, EVEX;
11766defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11767                                      avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11768
11769//===----------------------------------------------------------------------===//
11770// VNNI
11771//===----------------------------------------------------------------------===//
11772
11773let Constraints = "$src1 = $dst" in
11774multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11775                    X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11776  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11777                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11778                                   "$src3, $src2", "$src2, $src3",
11779                                   (VTI.VT (OpNode VTI.RC:$src1,
11780                                            VTI.RC:$src2, VTI.RC:$src3))>,
11781                                   EVEX_4V, T8PD, Sched<[sched]>;
11782  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11783                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11784                                   "$src3, $src2", "$src2, $src3",
11785                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11786                                            (VTI.VT (bitconvert
11787                                                     (VTI.LdFrag addr:$src3)))))>,
11788                                   EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11789                                   Sched<[sched.Folded, ReadAfterLd]>;
11790  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11791                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11792                                   OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11793                                   "$src2, ${src3}"##VTI.BroadcastStr,
11794                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
11795                                    (VTI.VT (X86VBroadcast
11796                                             (VTI.ScalarLdFrag addr:$src3))))>,
11797                                   EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11798                                   T8PD, Sched<[sched.Folded, ReadAfterLd]>;
11799}
11800
11801multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11802                       X86SchedWriteWidths sched> {
11803  let Predicates = [HasVNNI] in
11804  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
11805  let Predicates = [HasVNNI, HasVLX] in {
11806    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
11807    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
11808  }
11809}
11810
11811// FIXME: Is there a better scheduler class for VPDP?
11812defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
11813defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
11814defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
11815defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
11816
11817//===----------------------------------------------------------------------===//
11818// Bit Algorithms
11819//===----------------------------------------------------------------------===//
11820
11821// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11822defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11823                                   avx512vl_i8_info, HasBITALG>;
11824defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11825                                   avx512vl_i16_info, HasBITALG>, VEX_W;
11826
11827defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11828defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11829
11830multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11831  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11832                                (ins VTI.RC:$src1, VTI.RC:$src2),
11833                                "vpshufbitqmb",
11834                                "$src2, $src1", "$src1, $src2",
11835                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11836                                (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11837                                Sched<[sched]>;
11838  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11839                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
11840                                "vpshufbitqmb",
11841                                "$src2, $src1", "$src1, $src2",
11842                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11843                                (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
11844                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11845                                Sched<[sched.Folded, ReadAfterLd]>;
11846}
11847
11848multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11849  let Predicates = [HasBITALG] in
11850  defm Z      : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11851  let Predicates = [HasBITALG, HasVLX] in {
11852    defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11853    defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11854  }
11855}
11856
11857// FIXME: Is there a better scheduler class for VPSHUFBITQMB?
11858defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
11859
11860//===----------------------------------------------------------------------===//
11861// GFNI
11862//===----------------------------------------------------------------------===//
11863
11864multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11865                                   X86SchedWriteWidths sched> {
11866  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11867  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
11868                                EVEX_V512;
11869  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11870    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
11871                                EVEX_V256;
11872    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
11873                                EVEX_V128;
11874  }
11875}
11876
11877defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
11878                                          SchedWriteVecALU>,
11879                                          EVEX_CD8<8, CD8VF>, T8PD;
11880
11881multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
11882                                      X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11883                                      X86VectorVTInfo BcstVTI>
11884           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
11885  let ExeDomain = VTI.ExeDomain in
11886  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11887                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
11888                OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
11889                "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
11890                (OpNode (VTI.VT VTI.RC:$src1),
11891                 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
11892                 (i8 imm:$src3))>, EVEX_B,
11893                 Sched<[sched.Folded, ReadAfterLd]>;
11894}
11895
11896multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
11897                                     X86SchedWriteWidths sched> {
11898  let Predicates = [HasGFNI, HasAVX512, HasBWI] in
11899  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
11900                                           v64i8_info, v8i64_info>, EVEX_V512;
11901  let Predicates = [HasGFNI, HasVLX, HasBWI] in {
11902    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
11903                                           v32i8x_info, v4i64x_info>, EVEX_V256;
11904    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
11905                                           v16i8x_info, v2i64x_info>, EVEX_V128;
11906  }
11907}
11908
11909defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
11910                         X86GF2P8affineinvqb, SchedWriteVecIMul>,
11911                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11912defm VGF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
11913                         X86GF2P8affineqb, SchedWriteVecIMul>,
11914                         EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
11915
11916
11917//===----------------------------------------------------------------------===//
11918// AVX5124FMAPS
11919//===----------------------------------------------------------------------===//
11920
11921let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
11922    Constraints = "$src1 = $dst" in {
11923defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
11924                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11925                    "v4fmaddps", "$src3, $src2", "$src2, $src3",
11926                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11927                    Sched<[SchedWriteFMA.ZMM.Folded]>;
11928
11929defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
11930                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11931                     "v4fnmaddps", "$src3, $src2", "$src2, $src3",
11932                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11933                     Sched<[SchedWriteFMA.ZMM.Folded]>;
11934
11935defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
11936                    (outs VR128X:$dst), (ins  VR128X:$src2, f128mem:$src3),
11937                    "v4fmaddss", "$src3, $src2", "$src2, $src3",
11938                    []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11939                    Sched<[SchedWriteFMA.Scl.Folded]>;
11940
11941defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
11942                     (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
11943                     "v4fnmaddss", "$src3, $src2", "$src2, $src3",
11944                     []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
11945                     Sched<[SchedWriteFMA.Scl.Folded]>;
11946}
11947
11948//===----------------------------------------------------------------------===//
11949// AVX5124VNNIW
11950//===----------------------------------------------------------------------===//
11951
11952let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
11953    Constraints = "$src1 = $dst" in {
11954defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
11955                    (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11956                     "vp4dpwssd", "$src3, $src2", "$src2, $src3",
11957                    []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11958                    Sched<[SchedWriteFMA.ZMM.Folded]>;
11959
11960defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
11961                     (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
11962                     "vp4dpwssds", "$src3, $src2", "$src2, $src3",
11963                     []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
11964                     Sched<[SchedWriteFMA.ZMM.Folded]>;
11965}
11966
11967