• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the X86 SSE instruction set, defining the instructions,
11// and properties of the instructions which are needed for code generation,
12// machine code emission, and analysis.
13//
14//===----------------------------------------------------------------------===//
15
16class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
17  InstrItinClass rr = arg_rr;
18  InstrItinClass rm = arg_rm;
19}
20
21class SizeItins<OpndItins arg_s, OpndItins arg_d> {
22  OpndItins s = arg_s;
23  OpndItins d = arg_d;
24}
25
26
27class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
28  InstrItinClass arg_ri> {
29  InstrItinClass rr = arg_rr;
30  InstrItinClass rm = arg_rm;
31  InstrItinClass ri = arg_ri;
32}
33
34
35// scalar
36def SSE_ALU_F32S : OpndItins<
37  IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
38>;
39
40def SSE_ALU_F64S : OpndItins<
41  IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
42>;
43
44def SSE_ALU_ITINS_S : SizeItins<
45  SSE_ALU_F32S, SSE_ALU_F64S
46>;
47
48def SSE_MUL_F32S : OpndItins<
49  IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
50>;
51
52def SSE_MUL_F64S : OpndItins<
53  IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
54>;
55
56def SSE_MUL_ITINS_S : SizeItins<
57  SSE_MUL_F32S, SSE_MUL_F64S
58>;
59
60def SSE_DIV_F32S : OpndItins<
61  IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
62>;
63
64def SSE_DIV_F64S : OpndItins<
65  IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
66>;
67
68def SSE_DIV_ITINS_S : SizeItins<
69  SSE_DIV_F32S, SSE_DIV_F64S
70>;
71
72// parallel
73def SSE_ALU_F32P : OpndItins<
74  IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
75>;
76
77def SSE_ALU_F64P : OpndItins<
78  IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
79>;
80
81def SSE_ALU_ITINS_P : SizeItins<
82  SSE_ALU_F32P, SSE_ALU_F64P
83>;
84
85def SSE_MUL_F32P : OpndItins<
86  IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
87>;
88
89def SSE_MUL_F64P : OpndItins<
90  IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
91>;
92
93def SSE_MUL_ITINS_P : SizeItins<
94  SSE_MUL_F32P, SSE_MUL_F64P
95>;
96
97def SSE_DIV_F32P : OpndItins<
98  IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
99>;
100
101def SSE_DIV_F64P : OpndItins<
102  IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
103>;
104
105def SSE_DIV_ITINS_P : SizeItins<
106  SSE_DIV_F32P, SSE_DIV_F64P
107>;
108
109def SSE_BIT_ITINS_P : OpndItins<
110  IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
111>;
112
113def SSE_INTALU_ITINS_P : OpndItins<
114  IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
115>;
116
117def SSE_INTALUQ_ITINS_P : OpndItins<
118  IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
119>;
120
121def SSE_INTMUL_ITINS_P : OpndItins<
122  IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
123>;
124
125def SSE_INTSHIFT_ITINS_P : ShiftOpndItins<
126  IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI
127>;
128
129def SSE_MOVA_ITINS : OpndItins<
130  IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM
131>;
132
133def SSE_MOVU_ITINS : OpndItins<
134  IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM
135>;
136
137//===----------------------------------------------------------------------===//
138// SSE 1 & 2 Instructions Classes
139//===----------------------------------------------------------------------===//
140
141/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
142multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
143                           RegisterClass RC, X86MemOperand x86memop,
144                           OpndItins itins,
145                           bit Is2Addr = 1> {
146  let isCommutable = 1 in {
147    def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
148       !if(Is2Addr,
149           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
150           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
151       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
152  }
153  def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
154       !if(Is2Addr,
155           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
156           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
157       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
158}
159
160/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
161multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
162                             string asm, string SSEVer, string FPSizeStr,
163                             Operand memopr, ComplexPattern mem_cpat,
164                             OpndItins itins,
165                             bit Is2Addr = 1> {
166  def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
167       !if(Is2Addr,
168           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
169           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
170       [(set RC:$dst, (!cast<Intrinsic>(
171                 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
172             RC:$src1, RC:$src2))], itins.rr>;
173  def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
174       !if(Is2Addr,
175           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
176           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
177       [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
178                                          SSEVer, "_", OpcodeStr, FPSizeStr))
179             RC:$src1, mem_cpat:$src2))], itins.rm>;
180}
181
182/// sse12_fp_packed - SSE 1 & 2 packed instructions class
183multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
184                           RegisterClass RC, ValueType vt,
185                           X86MemOperand x86memop, PatFrag mem_frag,
186                           Domain d, OpndItins itins, bit Is2Addr = 1> {
187  let isCommutable = 1 in
188    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
189       !if(Is2Addr,
190           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
191           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
192       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
193  let mayLoad = 1 in
194    def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
195       !if(Is2Addr,
196           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
197           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
198       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
199          itins.rm, d>;
200}
201
202/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
203multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
204                                      string OpcodeStr, X86MemOperand x86memop,
205                                      list<dag> pat_rr, list<dag> pat_rm,
206                                      bit Is2Addr = 1,
207                                      bit rr_hasSideEffects = 0> {
208  let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
209    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
210       !if(Is2Addr,
211           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
212           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
213       pat_rr, IIC_DEFAULT, d>;
214  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
215       !if(Is2Addr,
216           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
217           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
218       pat_rm, IIC_DEFAULT, d>;
219}
220
221/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
222multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
223                           string asm, string SSEVer, string FPSizeStr,
224                           X86MemOperand x86memop, PatFrag mem_frag,
225                           Domain d, OpndItins itins, bit Is2Addr = 1> {
226  def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
227       !if(Is2Addr,
228           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
229           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
230           [(set RC:$dst, (!cast<Intrinsic>(
231                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
232                 RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
233  def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
234       !if(Is2Addr,
235           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
236           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
237       [(set RC:$dst, (!cast<Intrinsic>(
238                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
239             RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
240}
241
242//===----------------------------------------------------------------------===//
243//  Non-instruction patterns
244//===----------------------------------------------------------------------===//
245
246// A vector extract of the first f32/f64 position is a subregister copy
247def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
248          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
249def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
250          (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
251
252// A 128-bit subvector extract from the first 256-bit vector position
253// is a subregister copy that needs no instruction.
254def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (i32 0))),
255          (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>;
256def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (i32 0))),
257          (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>;
258
259def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (i32 0))),
260          (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>;
261def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (i32 0))),
262          (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>;
263
264def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (i32 0))),
265          (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>;
266def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (i32 0))),
267          (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>;
268
269// A 128-bit subvector insert to the first 256-bit vector position
270// is a subregister copy that needs no instruction.
271def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (i32 0)),
272          (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
273def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (i32 0)),
274          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
275def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (i32 0)),
276          (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
277def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (i32 0)),
278          (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
279def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (i32 0)),
280          (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
281def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
282          (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>;
283
284// Implicitly promote a 32-bit scalar to a vector.
285def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
286          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
287def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
288          (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
289// Implicitly promote a 64-bit scalar to a vector.
290def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
291          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
292def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
293          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
294
295// Bitcasts between 128-bit vector types. Return the original type since
296// no instruction is needed for the conversion
297let Predicates = [HasSSE2] in {
298  def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
299  def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
300  def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
301  def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
302  def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
303  def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
304  def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
305  def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
306  def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
307  def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
308  def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
309  def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
310  def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
311  def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
312  def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
313  def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
314  def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
315  def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
316  def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
317  def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
318  def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
319  def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
320  def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
321  def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
322  def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
323  def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
324  def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
325  def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
326  def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
327  def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
328}
329
330// Bitcasts between 256-bit vector types. Return the original type since
331// no instruction is needed for the conversion
332let Predicates = [HasAVX] in {
333  def : Pat<(v4f64  (bitconvert (v8f32 VR256:$src))),  (v4f64 VR256:$src)>;
334  def : Pat<(v4f64  (bitconvert (v8i32 VR256:$src))),  (v4f64 VR256:$src)>;
335  def : Pat<(v4f64  (bitconvert (v4i64 VR256:$src))),  (v4f64 VR256:$src)>;
336  def : Pat<(v4f64  (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>;
337  def : Pat<(v4f64  (bitconvert (v32i8 VR256:$src))),  (v4f64 VR256:$src)>;
338  def : Pat<(v8f32  (bitconvert (v8i32 VR256:$src))),  (v8f32 VR256:$src)>;
339  def : Pat<(v8f32  (bitconvert (v4i64 VR256:$src))),  (v8f32 VR256:$src)>;
340  def : Pat<(v8f32  (bitconvert (v4f64 VR256:$src))),  (v8f32 VR256:$src)>;
341  def : Pat<(v8f32  (bitconvert (v32i8 VR256:$src))),  (v8f32 VR256:$src)>;
342  def : Pat<(v8f32  (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
343  def : Pat<(v4i64  (bitconvert (v8f32 VR256:$src))),  (v4i64 VR256:$src)>;
344  def : Pat<(v4i64  (bitconvert (v8i32 VR256:$src))),  (v4i64 VR256:$src)>;
345  def : Pat<(v4i64  (bitconvert (v4f64 VR256:$src))),  (v4i64 VR256:$src)>;
346  def : Pat<(v4i64  (bitconvert (v32i8 VR256:$src))),  (v4i64 VR256:$src)>;
347  def : Pat<(v4i64  (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>;
348  def : Pat<(v32i8  (bitconvert (v4f64 VR256:$src))),  (v32i8 VR256:$src)>;
349  def : Pat<(v32i8  (bitconvert (v4i64 VR256:$src))),  (v32i8 VR256:$src)>;
350  def : Pat<(v32i8  (bitconvert (v8f32 VR256:$src))),  (v32i8 VR256:$src)>;
351  def : Pat<(v32i8  (bitconvert (v8i32 VR256:$src))),  (v32i8 VR256:$src)>;
352  def : Pat<(v32i8  (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>;
353  def : Pat<(v8i32  (bitconvert (v32i8 VR256:$src))),  (v8i32 VR256:$src)>;
354  def : Pat<(v8i32  (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>;
355  def : Pat<(v8i32  (bitconvert (v8f32 VR256:$src))),  (v8i32 VR256:$src)>;
356  def : Pat<(v8i32  (bitconvert (v4i64 VR256:$src))),  (v8i32 VR256:$src)>;
357  def : Pat<(v8i32  (bitconvert (v4f64 VR256:$src))),  (v8i32 VR256:$src)>;
358  def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))),  (v16i16 VR256:$src)>;
359  def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))),  (v16i16 VR256:$src)>;
360  def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))),  (v16i16 VR256:$src)>;
361  def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))),  (v16i16 VR256:$src)>;
362  def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))),  (v16i16 VR256:$src)>;
363}
364
365// Alias instructions that map fld0 to pxor for sse.
366// This is expanded by ExpandPostRAPseudos.
367let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
368    isPseudo = 1 in {
369  def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
370                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
371  def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
372                   [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;
373}
374
375//===----------------------------------------------------------------------===//
376// AVX & SSE - Zero/One Vectors
377//===----------------------------------------------------------------------===//
378
379// Alias instruction that maps zero vector to pxor / xorp* for sse.
380// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
381// swizzled by ExecutionDepsFix to pxor.
382// We set canFoldAsLoad because this can be converted to a constant-pool
383// load of an all-zeros value if folding it would be beneficial.
384let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
385    isPseudo = 1, neverHasSideEffects = 1 in {
386def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", []>;
387}
388
389def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
390def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
391def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
392def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
393def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
394def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
395
396
397// The same as done above but for AVX.  The 256-bit ISA does not support PI,
398// and doesn't need it because on sandy bridge the register is set to zero
399// at the rename stage without using any execution unit, so SET0PSY
400// and SET0PDY can be used for vector int instructions without penalty
401// FIXME: Change encoding to pseudo! This is blocked right now by the x86
402// JIT implementatioan, it does not expand the instructions below like
403// X86MCInstLower does.
404let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
405    isCodeGenOnly = 1 in {
406let Predicates = [HasAVX] in {
407def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
408                   [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
409def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
410                   [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
411}
412let Predicates = [HasAVX2], neverHasSideEffects = 1 in
413def AVX2_SET0   : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
414                   []>, VEX_4V;
415}
416
417let Predicates = [HasAVX2], AddedComplexity = 5 in {
418  def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>;
419  def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
420  def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
421  def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
422}
423
424// AVX has no support for 256-bit integer instructions, but since the 128-bit
425// VPXOR instruction writes zero to its upper part, it's safe build zeros.
426def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
427def : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
428          (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
429
430def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
431def : Pat<(bc_v16i16 (v8f32 immAllZerosV)),
432          (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
433
434def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
435def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
436          (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
437
438def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
439def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
440          (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
441
442// We set canFoldAsLoad because this can be converted to a constant-pool
443// load of an all-ones value if folding it would be beneficial.
444// FIXME: Change encoding to pseudo! This is blocked right now by the x86
445// JIT implementation, it does not expand the instructions below like
446// X86MCInstLower does.
447let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
448    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in {
449  let Predicates = [HasAVX] in
450  def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
451                         [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
452  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
453                         [(set VR128:$dst, (v4i32 immAllOnesV))]>;
454  let Predicates = [HasAVX2] in
455  def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
456                          [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
457}
458
459
460//===----------------------------------------------------------------------===//
461// SSE 1 & 2 - Move FP Scalar Instructions
462//
463// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
464// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
465// is used instead. Register-to-register movss/movsd is not modeled as an
466// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
467// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
468//===----------------------------------------------------------------------===//
469
470class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> :
471      SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
472      [(set VR128:$dst, (vt (OpNode VR128:$src1,
473                             (scalar_to_vector RC:$src2))))],
474      IIC_SSE_MOV_S_RR>;
475
476// Loading from memory automatically zeroing upper bits.
477class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
478                    PatFrag mem_pat, string OpcodeStr> :
479      SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
480         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
481                        [(set RC:$dst, (mem_pat addr:$src))],
482                        IIC_SSE_MOV_S_RM>;
483
484// AVX
485def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
486                "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
487                VEX_LIG;
488def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
489                "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
490                VEX_LIG;
491
492// For the disassembler
493let isCodeGenOnly = 1 in {
494  def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
495                        (ins VR128:$src1, FR32:$src2),
496                        "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
497                        IIC_SSE_MOV_S_RR>,
498                        XS, VEX_4V, VEX_LIG;
499  def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
500                        (ins VR128:$src1, FR64:$src2),
501                        "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
502                        IIC_SSE_MOV_S_RR>,
503                        XD, VEX_4V, VEX_LIG;
504}
505
506let canFoldAsLoad = 1, isReMaterializable = 1 in {
507  def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX,
508                 VEX_LIG;
509  let AddedComplexity = 20 in
510    def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX,
511                   VEX_LIG;
512}
513
514def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
515                  "movss\t{$src, $dst|$dst, $src}",
516                  [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
517                  XS, VEX, VEX_LIG;
518def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
519                  "movsd\t{$src, $dst|$dst, $src}",
520                  [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
521                  XD, VEX, VEX_LIG;
522
523// SSE1 & 2
524let Constraints = "$src1 = $dst" in {
525  def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
526                          "movss\t{$src2, $dst|$dst, $src2}">, XS;
527  def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
528                          "movsd\t{$src2, $dst|$dst, $src2}">, XD;
529
530  // For the disassembler
531  let isCodeGenOnly = 1 in {
532    def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
533                         (ins VR128:$src1, FR32:$src2),
534                         "movss\t{$src2, $dst|$dst, $src2}", [],
535                         IIC_SSE_MOV_S_RR>, XS;
536    def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
537                         (ins VR128:$src1, FR64:$src2),
538                         "movsd\t{$src2, $dst|$dst, $src2}", [],
539                         IIC_SSE_MOV_S_RR>, XD;
540  }
541}
542
543let canFoldAsLoad = 1, isReMaterializable = 1 in {
544  def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
545
546  let AddedComplexity = 20 in
547    def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
548}
549
550def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
551                  "movss\t{$src, $dst|$dst, $src}",
552                  [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
553def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
554                  "movsd\t{$src, $dst|$dst, $src}",
555                  [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
556
557// Patterns
558let Predicates = [HasAVX] in {
559  let AddedComplexity = 15 in {
560  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
561  // MOVS{S,D} to the lower bits.
562  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
563            (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
564  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
565            (VMOVSSrr (v4f32 (V_SET0)),
566                      (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
567  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
568            (VMOVSSrr (v4i32 (V_SET0)),
569                      (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
570  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
571            (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
572
573  // Move low f32 and clear high bits.
574  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
575            (SUBREG_TO_REG (i32 0),
576              (VMOVSSrr (v4f32 (V_SET0)),
577                        (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
578  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
579            (SUBREG_TO_REG (i32 0),
580              (VMOVSSrr (v4i32 (V_SET0)),
581                        (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
582  }
583
584  let AddedComplexity = 20 in {
585  // MOVSSrm zeros the high parts of the register; represent this
586  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
587  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
588            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
589  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
590            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
591  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
592            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
593
594  // MOVSDrm zeros the high parts of the register; represent this
595  // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
596  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
597            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
598  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
599            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
600  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
601            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
602  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
603            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
604  def : Pat<(v2f64 (X86vzload addr:$src)),
605            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
606
607  // Represent the same patterns above but in the form they appear for
608  // 256-bit types
609  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
610                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
611            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
612  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
613                   (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
614            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
615  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
616                   (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
617            (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
618  }
619  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
620                   (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
621            (SUBREG_TO_REG (i32 0),
622                           (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
623                           sub_xmm)>;
624  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
625                   (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))),
626            (SUBREG_TO_REG (i64 0),
627                           (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
628                           sub_xmm)>;
629  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
630                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
631            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
632
633  // Move low f64 and clear high bits.
634  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
635            (SUBREG_TO_REG (i32 0),
636              (VMOVSDrr (v2f64 (V_SET0)),
637                        (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
638
639  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
640            (SUBREG_TO_REG (i32 0),
641              (VMOVSDrr (v2i64 (V_SET0)),
642                        (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
643
644  // Extract and store.
645  def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
646                   addr:$dst),
647            (VMOVSSmr addr:$dst,
648                     (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
649  def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
650                   addr:$dst),
651            (VMOVSDmr addr:$dst,
652                     (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
653
654  // Shuffle with VMOVSS
655  def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
656            (VMOVSSrr (v4i32 VR128:$src1),
657                      (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
658  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
659            (VMOVSSrr (v4f32 VR128:$src1),
660                      (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
661
662  // 256-bit variants
663  def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
664            (SUBREG_TO_REG (i32 0),
665                (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
666                          (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
667  def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
668            (SUBREG_TO_REG (i32 0),
669                (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
670                          (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
671
672  // Shuffle with VMOVSD
673  def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
674            (VMOVSDrr (v2i64 VR128:$src1),
675                     (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
676  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
677            (VMOVSDrr (v2f64 VR128:$src1),
678                     (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
679  def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
680            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
681                                                   sub_sd))>;
682  def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
683            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
684                                                   sub_sd))>;
685
686  // 256-bit variants
687  def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
688            (SUBREG_TO_REG (i32 0),
689                (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
690                          (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
691  def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
692            (SUBREG_TO_REG (i32 0),
693                (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
694                          (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
695
696
697  // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
698  // is during lowering, where it's not possible to recognize the fold cause
699  // it has two uses through a bitcast. One use disappears at isel time and the
700  // fold opportunity reappears.
701  def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
702            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
703                                                   sub_sd))>;
704  def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
705            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
706                                                   sub_sd))>;
707  def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
708            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
709                                                   sub_sd))>;
710  def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
711            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
712                                                   sub_sd))>;
713}
714
715let Predicates = [HasSSE1] in {
716  let AddedComplexity = 15 in {
717  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
718  // MOVSS to the lower bits.
719  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
720            (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
721  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
722            (MOVSSrr (v4f32 (V_SET0)),
723                     (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
724  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
725            (MOVSSrr (v4i32 (V_SET0)),
726                     (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
727  }
728
729  let AddedComplexity = 20 in {
730  // MOVSSrm zeros the high parts of the register; represent this
731  // with SUBREG_TO_REG.
732  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
733            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
734  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
735            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
736  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
737            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
738  }
739
740  // Extract and store.
741  def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
742                   addr:$dst),
743            (MOVSSmr addr:$dst,
744                     (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
745
746  // Shuffle with MOVSS
747  def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
748            (MOVSSrr (v4i32 VR128:$src1),
749                     (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
750  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
751            (MOVSSrr (v4f32 VR128:$src1),
752                     (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
753}
754
755let Predicates = [HasSSE2] in {
756  let AddedComplexity = 15 in {
757  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
758  // MOVSD to the lower bits.
759  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
760            (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
761  }
762
763  let AddedComplexity = 20 in {
764  // MOVSDrm zeros the high parts of the register; represent this
765  // with SUBREG_TO_REG.
766  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
767            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
768  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
769            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
770  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
771            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
772  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
773            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
774  def : Pat<(v2f64 (X86vzload addr:$src)),
775            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
776  }
777
778  // Extract and store.
779  def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
780                   addr:$dst),
781            (MOVSDmr addr:$dst,
782                     (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
783
784  // Shuffle with MOVSD
785  def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
786            (MOVSDrr (v2i64 VR128:$src1),
787                     (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
788  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
789            (MOVSDrr (v2f64 VR128:$src1),
790                     (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
791  def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
792            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
793  def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
794            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
795
796  // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
797  // is during lowering, where it's not possible to recognize the fold cause
798  // it has two uses through a bitcast. One use disappears at isel time and the
799  // fold opportunity reappears.
800  def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
801            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
802  def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
803            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
804  def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
805            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
806  def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
807            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
808}
809
810//===----------------------------------------------------------------------===//
811// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
812//===----------------------------------------------------------------------===//
813
814multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
815                            X86MemOperand x86memop, PatFrag ld_frag,
816                            string asm, Domain d,
817                            OpndItins itins,
818                            bit IsReMaterializable = 1> {
819let neverHasSideEffects = 1 in
820  def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
821              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
822let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
823  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
824              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
825                   [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
826}
827
828defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
829                              "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
830                              TB, VEX;
831defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
832                              "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
833                              TB, OpSize, VEX;
834defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
835                              "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
836                              TB, VEX;
837defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
838                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
839                              TB, OpSize, VEX;
840
841defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
842                              "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
843                              TB, VEX;
844defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
845                              "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
846                              TB, OpSize, VEX;
847defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
848                              "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
849                              TB, VEX;
850defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
851                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
852                              TB, OpSize, VEX;
853defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
854                              "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
855                              TB;
856defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
857                              "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
858                              TB, OpSize;
859defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
860                              "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
861                              TB;
862defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
863                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
864                              TB, OpSize;
865
866def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
867                   "movaps\t{$src, $dst|$dst, $src}",
868                   [(alignedstore (v4f32 VR128:$src), addr:$dst)],
869                   IIC_SSE_MOVA_P_MR>, VEX;
870def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
871                   "movapd\t{$src, $dst|$dst, $src}",
872                   [(alignedstore (v2f64 VR128:$src), addr:$dst)],
873                   IIC_SSE_MOVA_P_MR>, VEX;
874def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
875                   "movups\t{$src, $dst|$dst, $src}",
876                   [(store (v4f32 VR128:$src), addr:$dst)],
877                   IIC_SSE_MOVU_P_MR>, VEX;
878def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
879                   "movupd\t{$src, $dst|$dst, $src}",
880                   [(store (v2f64 VR128:$src), addr:$dst)],
881                   IIC_SSE_MOVU_P_MR>, VEX;
882def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
883                   "movaps\t{$src, $dst|$dst, $src}",
884                   [(alignedstore256 (v8f32 VR256:$src), addr:$dst)],
885                   IIC_SSE_MOVA_P_MR>, VEX;
886def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
887                   "movapd\t{$src, $dst|$dst, $src}",
888                   [(alignedstore256 (v4f64 VR256:$src), addr:$dst)],
889                   IIC_SSE_MOVA_P_MR>, VEX;
890def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
891                   "movups\t{$src, $dst|$dst, $src}",
892                   [(store (v8f32 VR256:$src), addr:$dst)],
893                   IIC_SSE_MOVU_P_MR>, VEX;
894def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
895                   "movupd\t{$src, $dst|$dst, $src}",
896                   [(store (v4f64 VR256:$src), addr:$dst)],
897                   IIC_SSE_MOVU_P_MR>, VEX;
898
899// For disassembler
900let isCodeGenOnly = 1 in {
901  def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
902                          (ins VR128:$src),
903                          "movaps\t{$src, $dst|$dst, $src}", [],
904                          IIC_SSE_MOVA_P_RR>, VEX;
905  def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
906                           (ins VR128:$src),
907                           "movapd\t{$src, $dst|$dst, $src}", [],
908                           IIC_SSE_MOVA_P_RR>, VEX;
909  def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
910                           (ins VR128:$src),
911                           "movups\t{$src, $dst|$dst, $src}", [],
912                           IIC_SSE_MOVU_P_RR>, VEX;
913  def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
914                           (ins VR128:$src),
915                           "movupd\t{$src, $dst|$dst, $src}", [],
916                           IIC_SSE_MOVU_P_RR>, VEX;
917  def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
918                            (ins VR256:$src),
919                            "movaps\t{$src, $dst|$dst, $src}", [],
920                            IIC_SSE_MOVA_P_RR>, VEX;
921  def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
922                            (ins VR256:$src),
923                            "movapd\t{$src, $dst|$dst, $src}", [],
924                            IIC_SSE_MOVA_P_RR>, VEX;
925  def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
926                            (ins VR256:$src),
927                            "movups\t{$src, $dst|$dst, $src}", [],
928                            IIC_SSE_MOVU_P_RR>, VEX;
929  def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
930                            (ins VR256:$src),
931                            "movupd\t{$src, $dst|$dst, $src}", [],
932                            IIC_SSE_MOVU_P_RR>, VEX;
933}
934
935let Predicates = [HasAVX] in {
936def : Pat<(v8i32 (X86vzmovl
937                        (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
938          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
939def : Pat<(v4i64 (X86vzmovl
940                        (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))),
941          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
942def : Pat<(v8f32 (X86vzmovl
943                        (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
944          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
945def : Pat<(v4f64 (X86vzmovl
946                        (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
947          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
948}
949
950
951def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
952          (VMOVUPSYmr addr:$dst, VR256:$src)>;
953def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
954          (VMOVUPDYmr addr:$dst, VR256:$src)>;
955
956def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
957                   "movaps\t{$src, $dst|$dst, $src}",
958                   [(alignedstore (v4f32 VR128:$src), addr:$dst)],
959                   IIC_SSE_MOVA_P_MR>;
960def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
961                   "movapd\t{$src, $dst|$dst, $src}",
962                   [(alignedstore (v2f64 VR128:$src), addr:$dst)],
963                   IIC_SSE_MOVA_P_MR>;
964def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
965                   "movups\t{$src, $dst|$dst, $src}",
966                   [(store (v4f32 VR128:$src), addr:$dst)],
967                   IIC_SSE_MOVU_P_MR>;
968def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
969                   "movupd\t{$src, $dst|$dst, $src}",
970                   [(store (v2f64 VR128:$src), addr:$dst)],
971                   IIC_SSE_MOVU_P_MR>;
972
973// For disassembler
974let isCodeGenOnly = 1 in {
975  def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
976                         "movaps\t{$src, $dst|$dst, $src}", [],
977                         IIC_SSE_MOVA_P_RR>;
978  def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
979                         "movapd\t{$src, $dst|$dst, $src}", [],
980                         IIC_SSE_MOVA_P_RR>;
981  def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
982                         "movups\t{$src, $dst|$dst, $src}", [],
983                         IIC_SSE_MOVU_P_RR>;
984  def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
985                         "movupd\t{$src, $dst|$dst, $src}", [],
986                         IIC_SSE_MOVU_P_RR>;
987}
988
989let Predicates = [HasAVX] in {
990  def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
991            (VMOVUPSmr addr:$dst, VR128:$src)>;
992  def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
993            (VMOVUPDmr addr:$dst, VR128:$src)>;
994}
995
996let Predicates = [HasSSE1] in
997  def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src),
998            (MOVUPSmr addr:$dst, VR128:$src)>;
999let Predicates = [HasSSE2] in
1000  def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
1001            (MOVUPDmr addr:$dst, VR128:$src)>;
1002
1003// Use vmovaps/vmovups for AVX integer load/store.
1004let Predicates = [HasAVX] in {
1005  // 128-bit load/store
1006  def : Pat<(alignedloadv2i64 addr:$src),
1007            (VMOVAPSrm addr:$src)>;
1008  def : Pat<(loadv2i64 addr:$src),
1009            (VMOVUPSrm addr:$src)>;
1010
1011  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
1012            (VMOVAPSmr addr:$dst, VR128:$src)>;
1013  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
1014            (VMOVAPSmr addr:$dst, VR128:$src)>;
1015  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
1016            (VMOVAPSmr addr:$dst, VR128:$src)>;
1017  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
1018            (VMOVAPSmr addr:$dst, VR128:$src)>;
1019  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1020            (VMOVUPSmr addr:$dst, VR128:$src)>;
1021  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1022            (VMOVUPSmr addr:$dst, VR128:$src)>;
1023  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1024            (VMOVUPSmr addr:$dst, VR128:$src)>;
1025  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1026            (VMOVUPSmr addr:$dst, VR128:$src)>;
1027
1028  // 256-bit load/store
1029  def : Pat<(alignedloadv4i64 addr:$src),
1030            (VMOVAPSYrm addr:$src)>;
1031  def : Pat<(loadv4i64 addr:$src),
1032            (VMOVUPSYrm addr:$src)>;
1033  def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst),
1034            (VMOVAPSYmr addr:$dst, VR256:$src)>;
1035  def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
1036            (VMOVAPSYmr addr:$dst, VR256:$src)>;
1037  def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
1038            (VMOVAPSYmr addr:$dst, VR256:$src)>;
1039  def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
1040            (VMOVAPSYmr addr:$dst, VR256:$src)>;
1041  def : Pat<(store (v4i64 VR256:$src), addr:$dst),
1042            (VMOVUPSYmr addr:$dst, VR256:$src)>;
1043  def : Pat<(store (v8i32 VR256:$src), addr:$dst),
1044            (VMOVUPSYmr addr:$dst, VR256:$src)>;
1045  def : Pat<(store (v16i16 VR256:$src), addr:$dst),
1046            (VMOVUPSYmr addr:$dst, VR256:$src)>;
1047  def : Pat<(store (v32i8 VR256:$src), addr:$dst),
1048            (VMOVUPSYmr addr:$dst, VR256:$src)>;
1049}
1050
1051// Use movaps / movups for SSE integer load / store (one byte shorter).
1052// The instructions selected below are then converted to MOVDQA/MOVDQU
1053// during the SSE domain pass.
1054let Predicates = [HasSSE1] in {
1055  def : Pat<(alignedloadv2i64 addr:$src),
1056            (MOVAPSrm addr:$src)>;
1057  def : Pat<(loadv2i64 addr:$src),
1058            (MOVUPSrm addr:$src)>;
1059
1060  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
1061            (MOVAPSmr addr:$dst, VR128:$src)>;
1062  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
1063            (MOVAPSmr addr:$dst, VR128:$src)>;
1064  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
1065            (MOVAPSmr addr:$dst, VR128:$src)>;
1066  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
1067            (MOVAPSmr addr:$dst, VR128:$src)>;
1068  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
1069            (MOVUPSmr addr:$dst, VR128:$src)>;
1070  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
1071            (MOVUPSmr addr:$dst, VR128:$src)>;
1072  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
1073            (MOVUPSmr addr:$dst, VR128:$src)>;
1074  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
1075            (MOVUPSmr addr:$dst, VR128:$src)>;
1076}
1077
1078// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
1079// bits are disregarded. FIXME: Set encoding to pseudo!
1080let neverHasSideEffects = 1 in {
1081def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
1082                       "movaps\t{$src, $dst|$dst, $src}", [],
1083                       IIC_SSE_MOVA_P_RR>, VEX;
1084def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
1085                       "movapd\t{$src, $dst|$dst, $src}", [],
1086                       IIC_SSE_MOVA_P_RR>, VEX;
1087def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
1088                     "movaps\t{$src, $dst|$dst, $src}", [],
1089                     IIC_SSE_MOVA_P_RR>;
1090def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
1091                     "movapd\t{$src, $dst|$dst, $src}", [],
1092                     IIC_SSE_MOVA_P_RR>;
1093}
1094
1095// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
1096// bits are disregarded. FIXME: Set encoding to pseudo!
1097let canFoldAsLoad = 1, isReMaterializable = 1 in {
1098let isCodeGenOnly = 1 in {
1099  def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
1100                         "movaps\t{$src, $dst|$dst, $src}",
1101                         [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
1102                         IIC_SSE_MOVA_P_RM>, VEX;
1103  def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
1104                         "movapd\t{$src, $dst|$dst, $src}",
1105                         [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
1106                         IIC_SSE_MOVA_P_RM>, VEX;
1107}
1108def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
1109                     "movaps\t{$src, $dst|$dst, $src}",
1110                     [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
1111                     IIC_SSE_MOVA_P_RM>;
1112def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
1113                     "movapd\t{$src, $dst|$dst, $src}",
1114                     [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
1115                     IIC_SSE_MOVA_P_RM>;
1116}
1117
1118//===----------------------------------------------------------------------===//
1119// SSE 1 & 2 - Move Low packed FP Instructions
1120//===----------------------------------------------------------------------===//
1121
1122multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
1123                                 SDNode psnode, SDNode pdnode, string base_opc,
1124                                 string asm_opr, InstrItinClass itin> {
1125  def PSrm : PI<opc, MRMSrcMem,
1126         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
1127         !strconcat(base_opc, "s", asm_opr),
1128     [(set RC:$dst,
1129       (psnode RC:$src1,
1130              (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
1131              itin, SSEPackedSingle>, TB;
1132
1133  def PDrm : PI<opc, MRMSrcMem,
1134         (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
1135         !strconcat(base_opc, "d", asm_opr),
1136     [(set RC:$dst, (v2f64 (pdnode RC:$src1,
1137                              (scalar_to_vector (loadf64 addr:$src2)))))],
1138              itin, SSEPackedDouble>, TB, OpSize;
1139}
1140
1141let AddedComplexity = 20 in {
1142  defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
1143                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1144                     IIC_SSE_MOV_LH>, VEX_4V;
1145}
1146let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
1147  defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
1148                                   "\t{$src2, $dst|$dst, $src2}",
1149                                   IIC_SSE_MOV_LH>;
1150}
1151
1152def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1153                   "movlps\t{$src, $dst|$dst, $src}",
1154                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
1155                                 (iPTR 0))), addr:$dst)],
1156                                 IIC_SSE_MOV_LH>, VEX;
1157def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1158                   "movlpd\t{$src, $dst|$dst, $src}",
1159                   [(store (f64 (vector_extract (v2f64 VR128:$src),
1160                                 (iPTR 0))), addr:$dst)],
1161                                 IIC_SSE_MOV_LH>, VEX;
1162def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1163                   "movlps\t{$src, $dst|$dst, $src}",
1164                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
1165                                 (iPTR 0))), addr:$dst)],
1166                                 IIC_SSE_MOV_LH>;
1167def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1168                   "movlpd\t{$src, $dst|$dst, $src}",
1169                   [(store (f64 (vector_extract (v2f64 VR128:$src),
1170                                 (iPTR 0))), addr:$dst)],
1171                                 IIC_SSE_MOV_LH>;
1172
1173let Predicates = [HasAVX] in {
1174  // Shuffle with VMOVLPS
1175  def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
1176            (VMOVLPSrm VR128:$src1, addr:$src2)>;
1177  def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
1178            (VMOVLPSrm VR128:$src1, addr:$src2)>;
1179
1180  // Shuffle with VMOVLPD
1181  def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1182            (VMOVLPDrm VR128:$src1, addr:$src2)>;
1183  def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1184            (VMOVLPDrm VR128:$src1, addr:$src2)>;
1185
1186  // Store patterns
1187  def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
1188                   addr:$src1),
1189            (VMOVLPSmr addr:$src1, VR128:$src2)>;
1190  def : Pat<(store (v4i32 (X86Movlps
1191                   (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
1192            (VMOVLPSmr addr:$src1, VR128:$src2)>;
1193  def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1194                   addr:$src1),
1195            (VMOVLPDmr addr:$src1, VR128:$src2)>;
1196  def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1197                   addr:$src1),
1198            (VMOVLPDmr addr:$src1, VR128:$src2)>;
1199}
1200
1201let Predicates = [HasSSE1] in {
1202  // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
1203  def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
1204                                 (iPTR 0))), addr:$src1),
1205            (MOVLPSmr addr:$src1, VR128:$src2)>;
1206
1207  // Shuffle with MOVLPS
1208  def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
1209            (MOVLPSrm VR128:$src1, addr:$src2)>;
1210  def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
1211            (MOVLPSrm VR128:$src1, addr:$src2)>;
1212  def : Pat<(X86Movlps VR128:$src1,
1213                      (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
1214            (MOVLPSrm VR128:$src1, addr:$src2)>;
1215
1216  // Store patterns
1217  def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
1218                                      addr:$src1),
1219            (MOVLPSmr addr:$src1, VR128:$src2)>;
1220  def : Pat<(store (v4i32 (X86Movlps
1221                   (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
1222                              addr:$src1),
1223            (MOVLPSmr addr:$src1, VR128:$src2)>;
1224}
1225
1226let Predicates = [HasSSE2] in {
1227  // Shuffle with MOVLPD
1228  def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1229            (MOVLPDrm VR128:$src1, addr:$src2)>;
1230  def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
1231            (MOVLPDrm VR128:$src1, addr:$src2)>;
1232
1233  // Store patterns
1234  def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1235                           addr:$src1),
1236            (MOVLPDmr addr:$src1, VR128:$src2)>;
1237  def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),
1238                           addr:$src1),
1239            (MOVLPDmr addr:$src1, VR128:$src2)>;
1240}
1241
1242//===----------------------------------------------------------------------===//
1243// SSE 1 & 2 - Move Hi packed FP Instructions
1244//===----------------------------------------------------------------------===//
1245
1246let AddedComplexity = 20 in {
1247  defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
1248                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1249                     IIC_SSE_MOV_LH>, VEX_4V;
1250}
1251let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
1252  defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
1253                                   "\t{$src2, $dst|$dst, $src2}",
1254                                   IIC_SSE_MOV_LH>;
1255}
1256
1257// v2f64 extract element 1 is always custom lowered to unpack high to low
1258// and extract element 0 so the non-store version isn't too horrible.
1259def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1260                   "movhps\t{$src, $dst|$dst, $src}",
1261                   [(store (f64 (vector_extract
1262                                 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
1263                                            (bc_v2f64 (v4f32 VR128:$src))),
1264                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
1265def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1266                   "movhpd\t{$src, $dst|$dst, $src}",
1267                   [(store (f64 (vector_extract
1268                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
1269                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
1270def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1271                   "movhps\t{$src, $dst|$dst, $src}",
1272                   [(store (f64 (vector_extract
1273                                 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
1274                                            (bc_v2f64 (v4f32 VR128:$src))),
1275                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
1276def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
1277                   "movhpd\t{$src, $dst|$dst, $src}",
1278                   [(store (f64 (vector_extract
1279                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
1280                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
1281
1282let Predicates = [HasAVX] in {
1283  // VMOVHPS patterns
1284  def : Pat<(X86Movlhps VR128:$src1,
1285                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
1286            (VMOVHPSrm VR128:$src1, addr:$src2)>;
1287  def : Pat<(X86Movlhps VR128:$src1,
1288                 (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
1289            (VMOVHPSrm VR128:$src1, addr:$src2)>;
1290
1291  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
1292  // is during lowering, where it's not possible to recognize the load fold
1293  // cause it has two uses through a bitcast. One use disappears at isel time
1294  // and the fold opportunity reappears.
1295  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
1296                      (scalar_to_vector (loadf64 addr:$src2)))),
1297            (VMOVHPDrm VR128:$src1, addr:$src2)>;
1298}
1299
1300let Predicates = [HasSSE1] in {
1301  // MOVHPS patterns
1302  def : Pat<(X86Movlhps VR128:$src1,
1303                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
1304            (MOVHPSrm VR128:$src1, addr:$src2)>;
1305  def : Pat<(X86Movlhps VR128:$src1,
1306                 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
1307            (MOVHPSrm VR128:$src1, addr:$src2)>;
1308}
1309
1310let Predicates = [HasSSE2] in {
1311  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
1312  // is during lowering, where it's not possible to recognize the load fold
1313  // cause it has two uses through a bitcast. One use disappears at isel time
1314  // and the fold opportunity reappears.
1315  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
1316                      (scalar_to_vector (loadf64 addr:$src2)))),
1317            (MOVHPDrm VR128:$src1, addr:$src2)>;
1318}
1319
1320//===----------------------------------------------------------------------===//
1321// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
1322//===----------------------------------------------------------------------===//
1323
1324let AddedComplexity = 20 in {
1325  def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
1326                                       (ins VR128:$src1, VR128:$src2),
1327                      "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1328                      [(set VR128:$dst,
1329                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
1330                        IIC_SSE_MOV_LH>,
1331                      VEX_4V;
1332  def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
1333                                       (ins VR128:$src1, VR128:$src2),
1334                      "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1335                      [(set VR128:$dst,
1336                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
1337                        IIC_SSE_MOV_LH>,
1338                      VEX_4V;
1339}
1340let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
1341  def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
1342                                       (ins VR128:$src1, VR128:$src2),
1343                      "movlhps\t{$src2, $dst|$dst, $src2}",
1344                      [(set VR128:$dst,
1345                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
1346                        IIC_SSE_MOV_LH>;
1347  def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
1348                                       (ins VR128:$src1, VR128:$src2),
1349                      "movhlps\t{$src2, $dst|$dst, $src2}",
1350                      [(set VR128:$dst,
1351                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
1352                        IIC_SSE_MOV_LH>;
1353}
1354
1355let Predicates = [HasAVX] in {
1356  // MOVLHPS patterns
1357  def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
1358            (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
1359  def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
1360            (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
1361
1362  // MOVHLPS patterns
1363  def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
1364            (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
1365}
1366
1367let Predicates = [HasSSE1] in {
1368  // MOVLHPS patterns
1369  def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
1370            (MOVLHPSrr VR128:$src1, VR128:$src2)>;
1371  def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
1372            (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
1373
1374  // MOVHLPS patterns
1375  def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
1376            (MOVHLPSrr VR128:$src1, VR128:$src2)>;
1377}
1378
1379//===----------------------------------------------------------------------===//
1380// SSE 1 & 2 - Conversion Instructions
1381//===----------------------------------------------------------------------===//
1382
1383def SSE_CVT_PD : OpndItins<
1384  IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
1385>;
1386
1387def SSE_CVT_PS : OpndItins<
1388  IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
1389>;
1390
1391def SSE_CVT_Scalar : OpndItins<
1392  IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
1393>;
1394
1395def SSE_CVT_SS2SI_32 : OpndItins<
1396  IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
1397>;
1398
1399def SSE_CVT_SS2SI_64 : OpndItins<
1400  IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
1401>;
1402
1403def SSE_CVT_SD2SI : OpndItins<
1404  IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
1405>;
1406
1407multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1408                     SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
1409                     string asm, OpndItins itins> {
1410  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
1411                        [(set DstRC:$dst, (OpNode SrcRC:$src))],
1412                        itins.rr>;
1413  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
1414                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
1415                        itins.rm>;
1416}
1417
1418multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1419                         SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
1420                         string asm, Domain d, OpndItins itins> {
1421  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
1422                        [(set DstRC:$dst, (OpNode SrcRC:$src))],
1423                        itins.rr, d>;
1424  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
1425                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
1426                        itins.rm, d>;
1427}
1428
1429multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1430                          X86MemOperand x86memop, string asm> {
1431let neverHasSideEffects = 1 in {
1432  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
1433              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
1434  let mayLoad = 1 in
1435  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
1436              (ins DstRC:$src1, x86memop:$src),
1437              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
1438} // neverHasSideEffects = 1
1439}
1440
1441defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
1442                                "cvttss2si\t{$src, $dst|$dst, $src}",
1443                                SSE_CVT_SS2SI_32>,
1444                                XS, VEX, VEX_LIG;
1445defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
1446                                "cvttss2si\t{$src, $dst|$dst, $src}",
1447                                SSE_CVT_SS2SI_64>,
1448                                XS, VEX, VEX_W, VEX_LIG;
1449defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
1450                                "cvttsd2si\t{$src, $dst|$dst, $src}",
1451                                SSE_CVT_SD2SI>,
1452                                XD, VEX, VEX_LIG;
1453defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
1454                                "cvttsd2si\t{$src, $dst|$dst, $src}",
1455                                SSE_CVT_SD2SI>,
1456                                XD, VEX, VEX_W, VEX_LIG;
1457
1458// The assembler can recognize rr 64-bit instructions by seeing a rxx
1459// register, but the same isn't true when only using memory operands,
1460// provide other assembly "l" and "q" forms to address this explicitly
1461// where appropriate to do so.
1462defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">,
1463                                  XS, VEX_4V, VEX_LIG;
1464defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
1465                                  XS, VEX_4V, VEX_W, VEX_LIG;
1466defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
1467                                  XD, VEX_4V, VEX_LIG;
1468defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
1469                                  XD, VEX_4V, VEX_LIG;
1470defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
1471                                  XD, VEX_4V, VEX_W, VEX_LIG;
1472
1473let Predicates = [HasAVX], AddedComplexity = 1 in {
1474  def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
1475            (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
1476  def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
1477            (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
1478  def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
1479            (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
1480  def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
1481            (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
1482
1483  def : Pat<(f32 (sint_to_fp GR32:$src)),
1484            (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
1485  def : Pat<(f32 (sint_to_fp GR64:$src)),
1486            (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
1487  def : Pat<(f64 (sint_to_fp GR32:$src)),
1488            (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
1489  def : Pat<(f64 (sint_to_fp GR64:$src)),
1490            (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
1491}
1492
1493defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
1494                      "cvttss2si\t{$src, $dst|$dst, $src}",
1495                      SSE_CVT_SS2SI_32>, XS;
1496defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
1497                      "cvttss2si{q}\t{$src, $dst|$dst, $src}",
1498                      SSE_CVT_SS2SI_64>, XS, REX_W;
1499defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
1500                      "cvttsd2si\t{$src, $dst|$dst, $src}",
1501                      SSE_CVT_SD2SI>, XD;
1502defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
1503                      "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
1504                      SSE_CVT_SD2SI>, XD, REX_W;
1505defm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
1506                      "cvtsi2ss\t{$src, $dst|$dst, $src}",
1507                      SSE_CVT_Scalar>, XS;
1508defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
1509                      "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
1510                      SSE_CVT_Scalar>, XS, REX_W;
1511defm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
1512                      "cvtsi2sd\t{$src, $dst|$dst, $src}",
1513                      SSE_CVT_Scalar>, XD;
1514defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
1515                      "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
1516                      SSE_CVT_Scalar>, XD, REX_W;
1517
1518// Conversion Instructions Intrinsics - Match intrinsics which expect MM
1519// and/or XMM operand(s).
1520
1521multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
1522                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
1523                         string asm, OpndItins itins> {
1524  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
1525              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1526              [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
1527  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
1528              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
1529              [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>;
1530}
1531
1532multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
1533                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
1534                    PatFrag ld_frag, string asm, OpndItins itins,
1535                    bit Is2Addr = 1> {
1536  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
1537              !if(Is2Addr,
1538                  !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
1539                  !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1540              [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
1541              itins.rr>;
1542  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
1543              (ins DstRC:$src1, x86memop:$src2),
1544              !if(Is2Addr,
1545                  !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
1546                  !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
1547              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
1548              itins.rm>;
1549}
1550
1551defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
1552                  f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
1553defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
1554                  int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si",
1555                  SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
1556
1557defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
1558                f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
1559defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
1560                  f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
1561
1562
1563defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1564          int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
1565          SSE_CVT_Scalar, 0>, XS, VEX_4V;
1566defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1567          int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss",
1568          SSE_CVT_Scalar, 0>, XS, VEX_4V,
1569          VEX_W;
1570defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1571          int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
1572          SSE_CVT_Scalar, 0>, XD, VEX_4V;
1573defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1574          int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd",
1575          SSE_CVT_Scalar, 0>, XD,
1576          VEX_4V, VEX_W;
1577
1578let Constraints = "$src1 = $dst" in {
1579  defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1580                        int_x86_sse_cvtsi2ss, i32mem, loadi32,
1581                        "cvtsi2ss", SSE_CVT_Scalar>, XS;
1582  defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1583                        int_x86_sse_cvtsi642ss, i64mem, loadi64,
1584                        "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W;
1585  defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
1586                        int_x86_sse2_cvtsi2sd, i32mem, loadi32,
1587                        "cvtsi2sd", SSE_CVT_Scalar>, XD;
1588  defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
1589                        int_x86_sse2_cvtsi642sd, i64mem, loadi64,
1590                        "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W;
1591}
1592
1593/// SSE 1 Only
1594
1595// Aliases for intrinsics
1596defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
1597                                    f32mem, load, "cvttss2si",
1598                                    SSE_CVT_SS2SI_32>, XS, VEX;
1599defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1600                                    int_x86_sse_cvttss2si64, f32mem, load,
1601                                    "cvttss2si", SSE_CVT_SS2SI_64>,
1602                                    XS, VEX, VEX_W;
1603defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
1604                                    f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
1605                                    XD, VEX;
1606defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1607                                    int_x86_sse2_cvttsd2si64, f128mem, load,
1608                                    "cvttsd2si", SSE_CVT_SD2SI>,
1609                                    XD, VEX, VEX_W;
1610defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
1611                                    f32mem, load, "cvttss2si",
1612                                    SSE_CVT_SS2SI_32>, XS;
1613defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1614                                    int_x86_sse_cvttss2si64, f32mem, load,
1615                                    "cvttss2si{q}", SSE_CVT_SS2SI_64>,
1616                                    XS, REX_W;
1617defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
1618                                    f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
1619                                    XD;
1620defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
1621                                    int_x86_sse2_cvttsd2si64, f128mem, load,
1622                                    "cvttsd2si{q}", SSE_CVT_SD2SI>,
1623                                    XD, REX_W;
1624
1625let Pattern = []<dag> in {
1626defm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
1627                               "cvtss2si{l}\t{$src, $dst|$dst, $src}",
1628                               SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
1629defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
1630                               "cvtss2si\t{$src, $dst|$dst, $src}",
1631                               SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
1632defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
1633                               "cvtdq2ps\t{$src, $dst|$dst, $src}",
1634                               SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
1635defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
1636                               "cvtdq2ps\t{$src, $dst|$dst, $src}",
1637                               SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
1638}
1639
1640let Pattern = []<dag> in {
1641defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
1642                          "cvtss2si{l}\t{$src, $dst|$dst, $src}",
1643                          SSE_CVT_SS2SI_32>, XS;
1644defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
1645                          "cvtss2si{q}\t{$src, $dst|$dst, $src}",
1646                          SSE_CVT_SS2SI_64>, XS, REX_W;
1647defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
1648                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
1649                            SSEPackedSingle, SSE_CVT_PS>,
1650                            TB; /* PD SSE3 form is avaiable */
1651}
1652
1653let Predicates = [HasAVX] in {
1654  def : Pat<(int_x86_sse_cvtss2si VR128:$src),
1655            (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
1656  def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
1657            (VCVTSS2SIrm addr:$src)>;
1658  def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
1659            (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
1660  def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
1661            (VCVTSS2SI64rm addr:$src)>;
1662}
1663
1664let Predicates = [HasSSE1] in {
1665  def : Pat<(int_x86_sse_cvtss2si VR128:$src),
1666            (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
1667  def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
1668            (CVTSS2SIrm addr:$src)>;
1669  def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
1670            (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
1671  def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
1672            (CVTSS2SI64rm addr:$src)>;
1673}
1674
1675/// SSE 2 Only
1676
1677// Convert scalar double to scalar single
1678def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
1679                       (ins FR64:$src1, FR64:$src2),
1680                      "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
1681                      IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG;
1682let mayLoad = 1 in
1683def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
1684                       (ins FR64:$src1, f64mem:$src2),
1685                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1686                      [], IIC_SSE_CVT_Scalar_RM>,
1687                      XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
1688
1689def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
1690          Requires<[HasAVX]>;
1691
1692def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
1693                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
1694                      [(set FR32:$dst, (fround FR64:$src))],
1695                      IIC_SSE_CVT_Scalar_RR>;
1696def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
1697                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
1698                      [(set FR32:$dst, (fround (loadf64 addr:$src)))],
1699                      IIC_SSE_CVT_Scalar_RM>,
1700                      XD,
1701                  Requires<[HasSSE2, OptForSize]>;
1702
1703defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
1704                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
1705                      SSE_CVT_Scalar, 0>,
1706                      XS, VEX_4V;
1707let Constraints = "$src1 = $dst" in
1708defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
1709                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
1710                      SSE_CVT_Scalar>, XS;
1711
1712// Convert scalar single to scalar double
1713// SSE2 instructions with XS prefix
1714def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
1715                    (ins FR32:$src1, FR32:$src2),
1716                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1717                    [], IIC_SSE_CVT_Scalar_RR>,
1718                    XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
1719let mayLoad = 1 in
1720def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
1721                    (ins FR32:$src1, f32mem:$src2),
1722                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1723                    [], IIC_SSE_CVT_Scalar_RM>,
1724                    XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
1725
1726let Predicates = [HasAVX] in {
1727  def : Pat<(f64 (fextend FR32:$src)),
1728            (VCVTSS2SDrr FR32:$src, FR32:$src)>;
1729  def : Pat<(fextend (loadf32 addr:$src)),
1730            (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
1731  def : Pat<(extloadf32 addr:$src),
1732            (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
1733}
1734
1735def : Pat<(extloadf32 addr:$src),
1736          (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>,
1737          Requires<[HasAVX, OptForSpeed]>;
1738
1739def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
1740                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1741                   [(set FR64:$dst, (fextend FR32:$src))],
1742                   IIC_SSE_CVT_Scalar_RR>, XS,
1743                 Requires<[HasSSE2]>;
1744def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
1745                   "cvtss2sd\t{$src, $dst|$dst, $src}",
1746                   [(set FR64:$dst, (extloadf32 addr:$src))],
1747                   IIC_SSE_CVT_Scalar_RM>, XS,
1748                 Requires<[HasSSE2, OptForSize]>;
1749
1750// extload f32 -> f64.  This matches load+fextend because we have a hack in
1751// the isel (PreprocessForFPConvert) that can introduce loads after dag
1752// combine.
1753// Since these loads aren't folded into the fextend, we have to match it
1754// explicitly here.
1755def : Pat<(fextend (loadf32 addr:$src)),
1756          (CVTSS2SDrm addr:$src)>, Requires<[HasSSE2]>;
1757def : Pat<(extloadf32 addr:$src),
1758          (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
1759
1760def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
1761                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1762                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1763                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1764                                       VR128:$src2))],
1765                                       IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V,
1766                    Requires<[HasAVX]>;
1767def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
1768                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
1769                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1770                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1771                                       (load addr:$src2)))],
1772                                       IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V,
1773                    Requires<[HasAVX]>;
1774let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
1775def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
1776                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
1777                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
1778                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1779                                       VR128:$src2))],
1780                                       IIC_SSE_CVT_Scalar_RR>, XS,
1781                    Requires<[HasSSE2]>;
1782def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
1783                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
1784                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
1785                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
1786                                       (load addr:$src2)))],
1787                                       IIC_SSE_CVT_Scalar_RM>, XS,
1788                    Requires<[HasSSE2]>;
1789}
1790
1791// Convert doubleword to packed single/double fp
1792// SSE2 instructions without OpSize prefix
1793def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1794                       "vcvtdq2ps\t{$src, $dst|$dst, $src}",
1795                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
1796                       IIC_SSE_CVT_PS_RR>,
1797                     TB, VEX, Requires<[HasAVX]>;
1798def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
1799                      "vcvtdq2ps\t{$src, $dst|$dst, $src}",
1800                      [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
1801                                        (bitconvert (memopv2i64 addr:$src))))],
1802                                        IIC_SSE_CVT_PS_RM>,
1803                     TB, VEX, Requires<[HasAVX]>;
1804def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1805                       "cvtdq2ps\t{$src, $dst|$dst, $src}",
1806                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
1807                       IIC_SSE_CVT_PS_RR>,
1808                     TB, Requires<[HasSSE2]>;
1809def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
1810                      "cvtdq2ps\t{$src, $dst|$dst, $src}",
1811                      [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
1812                                        (bitconvert (memopv2i64 addr:$src))))],
1813                                        IIC_SSE_CVT_PS_RM>,
1814                     TB, Requires<[HasSSE2]>;
1815
1816// FIXME: why the non-intrinsic version is described as SSE3?
1817// SSE2 instructions with XS prefix
1818def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1819                       "vcvtdq2pd\t{$src, $dst|$dst, $src}",
1820                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
1821                       IIC_SSE_CVT_PD_RR>,
1822                     XS, VEX, Requires<[HasAVX]>;
1823def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
1824                       "vcvtdq2pd\t{$src, $dst|$dst, $src}",
1825                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
1826                                        (bitconvert (memopv2i64 addr:$src))))],
1827                                        IIC_SSE_CVT_PD_RM>,
1828                     XS, VEX, Requires<[HasAVX]>;
1829def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1830                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
1831                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
1832                       IIC_SSE_CVT_PD_RR>,
1833                     XS, Requires<[HasSSE2]>;
1834def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
1835                     "cvtdq2pd\t{$src, $dst|$dst, $src}",
1836                     [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
1837                                        (bitconvert (memopv2i64 addr:$src))))],
1838                                        IIC_SSE_CVT_PD_RM>,
1839                     XS, Requires<[HasSSE2]>;
1840
1841
1842// Convert packed single/double fp to doubleword
1843def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1844                       "cvtps2dq\t{$src, $dst|$dst, $src}", [],
1845                       IIC_SSE_CVT_PS_RR>, VEX;
1846def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1847                       "cvtps2dq\t{$src, $dst|$dst, $src}", [],
1848                       IIC_SSE_CVT_PS_RM>, VEX;
1849def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
1850                        "cvtps2dq\t{$src, $dst|$dst, $src}", [],
1851                        IIC_SSE_CVT_PS_RR>, VEX;
1852def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
1853                        "cvtps2dq\t{$src, $dst|$dst, $src}", [],
1854                        IIC_SSE_CVT_PS_RM>, VEX;
1855def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1856                     "cvtps2dq\t{$src, $dst|$dst, $src}", [],
1857                     IIC_SSE_CVT_PS_RR>;
1858def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1859                     "cvtps2dq\t{$src, $dst|$dst, $src}", [],
1860                     IIC_SSE_CVT_PS_RM>;
1861
1862def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1863                        "cvtps2dq\t{$src, $dst|$dst, $src}",
1864                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
1865                        IIC_SSE_CVT_PS_RR>,
1866                        VEX;
1867def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
1868                         (ins f128mem:$src),
1869                         "cvtps2dq\t{$src, $dst|$dst, $src}",
1870                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq
1871                                            (memop addr:$src)))],
1872                                            IIC_SSE_CVT_PS_RM>, VEX;
1873def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1874                        "cvtps2dq\t{$src, $dst|$dst, $src}",
1875                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
1876                        IIC_SSE_CVT_PS_RR>;
1877def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1878                         "cvtps2dq\t{$src, $dst|$dst, $src}",
1879                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq
1880                                            (memop addr:$src)))],
1881                                            IIC_SSE_CVT_PS_RM>;
1882
1883// SSE2 packed instructions with XD prefix
1884def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1885                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
1886                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
1887                       IIC_SSE_CVT_PD_RR>,
1888                     XD, VEX, Requires<[HasAVX]>;
1889def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1890                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
1891                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
1892                                          (memop addr:$src)))],
1893                                          IIC_SSE_CVT_PD_RM>,
1894                     XD, VEX, Requires<[HasAVX]>;
1895def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1896                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
1897                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
1898                       IIC_SSE_CVT_PD_RR>,
1899                     XD, Requires<[HasSSE2]>;
1900def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1901                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
1902                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
1903                                          (memop addr:$src)))],
1904                                          IIC_SSE_CVT_PD_RM>,
1905                     XD, Requires<[HasSSE2]>;
1906
1907
1908// Convert with truncation packed single/double fp to doubleword
1909// SSE2 packed instructions with XS prefix
1910def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1911                        "cvttps2dq\t{$src, $dst|$dst, $src}",
1912                        [(set VR128:$dst,
1913                          (int_x86_sse2_cvttps2dq VR128:$src))],
1914                          IIC_SSE_CVT_PS_RR>, VEX;
1915def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1916                        "cvttps2dq\t{$src, $dst|$dst, $src}",
1917                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq
1918                                           (memop addr:$src)))],
1919                                           IIC_SSE_CVT_PS_RM>, VEX;
1920def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
1921                         "cvttps2dq\t{$src, $dst|$dst, $src}",
1922                         [(set VR256:$dst,
1923                           (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
1924                           IIC_SSE_CVT_PS_RR>, VEX;
1925def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
1926                         "cvttps2dq\t{$src, $dst|$dst, $src}",
1927                         [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
1928                                            (memopv8f32 addr:$src)))],
1929                                            IIC_SSE_CVT_PS_RM>, VEX;
1930
1931def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1932                      "cvttps2dq\t{$src, $dst|$dst, $src}",
1933                      [(set VR128:$dst,
1934                            (int_x86_sse2_cvttps2dq VR128:$src))],
1935                            IIC_SSE_CVT_PS_RR>;
1936def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1937                      "cvttps2dq\t{$src, $dst|$dst, $src}",
1938                      [(set VR128:$dst,
1939                            (int_x86_sse2_cvttps2dq (memop addr:$src)))],
1940                            IIC_SSE_CVT_PS_RM>;
1941
1942let Predicates = [HasAVX] in {
1943  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
1944            (Int_VCVTDQ2PSrr VR128:$src)>;
1945  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
1946            (Int_VCVTDQ2PSrm addr:$src)>;
1947
1948  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
1949            (VCVTTPS2DQrr VR128:$src)>;
1950  def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
1951            (VCVTTPS2DQrm addr:$src)>;
1952
1953  def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
1954            (VCVTDQ2PSYrr VR256:$src)>;
1955  def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (memopv4i64 addr:$src)))),
1956            (VCVTDQ2PSYrm addr:$src)>;
1957
1958  def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
1959            (VCVTTPS2DQYrr VR256:$src)>;
1960  def : Pat<(v8i32 (fp_to_sint (memopv8f32 addr:$src))),
1961            (VCVTTPS2DQYrm addr:$src)>;
1962}
1963
1964let Predicates = [HasSSE2] in {
1965  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
1966            (Int_CVTDQ2PSrr VR128:$src)>;
1967  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
1968            (Int_CVTDQ2PSrm addr:$src)>;
1969
1970  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
1971            (CVTTPS2DQrr VR128:$src)>;
1972  def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
1973            (CVTTPS2DQrm addr:$src)>;
1974}
1975
1976def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1977                        "cvttpd2dq\t{$src, $dst|$dst, $src}",
1978                        [(set VR128:$dst,
1979                              (int_x86_sse2_cvttpd2dq VR128:$src))],
1980                              IIC_SSE_CVT_PD_RR>, VEX;
1981let isCodeGenOnly = 1 in
1982def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
1983                        "cvttpd2dq\t{$src, $dst|$dst, $src}",
1984                        [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
1985                                               (memop addr:$src)))],
1986                                               IIC_SSE_CVT_PD_RM>, VEX;
1987def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
1988                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
1989                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
1990                      IIC_SSE_CVT_PD_RR>;
1991def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
1992                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
1993                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
1994                                        (memop addr:$src)))],
1995                                        IIC_SSE_CVT_PD_RM>;
1996
1997// The assembler can recognize rr 256-bit instructions by seeing a ymm
1998// register, but the same isn't true when using memory operands instead.
1999// Provide other assembly rr and rm forms to address this explicitly.
2000def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
2001                          "cvttpd2dq\t{$src, $dst|$dst, $src}", [],
2002                          IIC_SSE_CVT_PD_RR>, VEX;
2003
2004// XMM only
2005def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2006                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
2007                         IIC_SSE_CVT_PD_RR>, VEX;
2008def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2009                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
2010                         IIC_SSE_CVT_PD_RM>, VEX;
2011
2012// YMM only
2013def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
2014                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
2015                         IIC_SSE_CVT_PD_RR>, VEX;
2016def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
2017                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
2018                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
2019
2020// Convert packed single to packed double
2021let Predicates = [HasAVX] in {
2022                  // SSE2 instructions without OpSize prefix
2023def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2024                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
2025                     IIC_SSE_CVT_PD_RR>, TB, VEX;
2026def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2027                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
2028                     IIC_SSE_CVT_PD_RM>, TB, VEX;
2029def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
2030                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
2031                     IIC_SSE_CVT_PD_RR>, TB, VEX;
2032def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
2033                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
2034                     IIC_SSE_CVT_PD_RM>, TB, VEX;
2035}
2036def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2037                       "cvtps2pd\t{$src, $dst|$dst, $src}", [],
2038                       IIC_SSE_CVT_PD_RR>, TB;
2039def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2040                       "cvtps2pd\t{$src, $dst|$dst, $src}", [],
2041                       IIC_SSE_CVT_PD_RM>, TB;
2042
2043def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2044                       "vcvtps2pd\t{$src, $dst|$dst, $src}",
2045                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
2046                       IIC_SSE_CVT_PD_RR>,
2047                     TB, VEX, Requires<[HasAVX]>;
2048def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2049                       "vcvtps2pd\t{$src, $dst|$dst, $src}",
2050                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd
2051                                          (load addr:$src)))],
2052                                          IIC_SSE_CVT_PD_RM>,
2053                     TB, VEX, Requires<[HasAVX]>;
2054def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2055                       "cvtps2pd\t{$src, $dst|$dst, $src}",
2056                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
2057                       IIC_SSE_CVT_PD_RR>,
2058                     TB, Requires<[HasSSE2]>;
2059def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
2060                       "cvtps2pd\t{$src, $dst|$dst, $src}",
2061                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd
2062                                          (load addr:$src)))],
2063                                          IIC_SSE_CVT_PD_RM>,
2064                     TB, Requires<[HasSSE2]>;
2065
2066// Convert packed double to packed single
2067// The assembler can recognize rr 256-bit instructions by seeing a ymm
2068// register, but the same isn't true when using memory operands instead.
2069// Provide other assembly rr and rm forms to address this explicitly.
2070def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2071                       "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
2072                       IIC_SSE_CVT_PD_RR>, VEX;
2073def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
2074                         "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
2075                         IIC_SSE_CVT_PD_RR>, VEX;
2076
2077// XMM only
2078def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2079                        "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
2080                        IIC_SSE_CVT_PD_RR>, VEX;
2081def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2082                        "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
2083                        IIC_SSE_CVT_PD_RM>, VEX;
2084
2085// YMM only
2086def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
2087                        "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
2088                        IIC_SSE_CVT_PD_RR>, VEX;
2089def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
2090                        "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
2091                        IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
2092def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2093                     "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
2094                     IIC_SSE_CVT_PD_RR>;
2095def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2096                     "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
2097                     IIC_SSE_CVT_PD_RM>;
2098
2099
2100def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2101                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
2102                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
2103                        IIC_SSE_CVT_PD_RR>;
2104def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
2105                         (ins f128mem:$src),
2106                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
2107                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
2108                                            (memop addr:$src)))],
2109                                            IIC_SSE_CVT_PD_RM>;
2110def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
2111                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
2112                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
2113                        IIC_SSE_CVT_PD_RR>;
2114def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
2115                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
2116                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
2117                                            (memop addr:$src)))],
2118                                            IIC_SSE_CVT_PD_RM>;
2119
2120// AVX 256-bit register conversion intrinsics
2121// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
2122// whenever possible to avoid declaring two versions of each one.
2123def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
2124          (VCVTDQ2PSYrr VR256:$src)>;
2125def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
2126          (VCVTDQ2PSYrm addr:$src)>;
2127
2128def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
2129          (VCVTPD2PSYrr VR256:$src)>;
2130def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
2131          (VCVTPD2PSYrm addr:$src)>;
2132
2133def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
2134          (VCVTPS2DQYrr VR256:$src)>;
2135def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
2136          (VCVTPS2DQYrm addr:$src)>;
2137
2138def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
2139          (VCVTPS2PDYrr VR128:$src)>;
2140def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
2141          (VCVTPS2PDYrm addr:$src)>;
2142
2143def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
2144          (VCVTTPD2DQYrr VR256:$src)>;
2145def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
2146          (VCVTTPD2DQYrm addr:$src)>;
2147
2148// Match fround and fextend for 128/256-bit conversions
2149def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
2150          (VCVTPD2PSYrr VR256:$src)>;
2151def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
2152          (VCVTPD2PSYrm addr:$src)>;
2153
2154def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
2155          (VCVTPS2PDYrr VR128:$src)>;
2156def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
2157          (VCVTPS2PDYrm addr:$src)>;
2158
2159//===----------------------------------------------------------------------===//
2160// SSE 1 & 2 - Compare Instructions
2161//===----------------------------------------------------------------------===//
2162
2163// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
2164multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
2165                            Operand CC, SDNode OpNode, ValueType VT,
2166                            PatFrag ld_frag, string asm, string asm_alt,
2167                            OpndItins itins> {
2168  def rr : SIi8<0xC2, MRMSrcReg,
2169                (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
2170                [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
2171                itins.rr>;
2172  def rm : SIi8<0xC2, MRMSrcMem,
2173                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
2174                [(set RC:$dst, (OpNode (VT RC:$src1),
2175                                         (ld_frag addr:$src2), imm:$cc))],
2176                                         itins.rm>;
2177
2178  // Accept explicit immediate argument form instead of comparison code.
2179  let neverHasSideEffects = 1 in {
2180    def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
2181                      (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
2182                      IIC_SSE_ALU_F32S_RR>;
2183    let mayLoad = 1 in
2184    def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
2185                      (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
2186                      IIC_SSE_ALU_F32S_RM>;
2187  }
2188}
2189
2190defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32,
2191                 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2192                 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2193                 SSE_ALU_F32S>,
2194                 XS, VEX_4V, VEX_LIG;
2195defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64,
2196                 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2197                 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2198                 SSE_ALU_F32S>, // same latency as 32 bit compare
2199                 XD, VEX_4V, VEX_LIG;
2200
2201let Constraints = "$src1 = $dst" in {
2202  defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32,
2203                  "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
2204                  "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>,
2205                  XS;
2206  defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64,
2207                  "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
2208                  "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
2209                  SSE_ALU_F32S>, // same latency as 32 bit compare
2210                  XD;
2211}
2212
2213multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
2214                         Intrinsic Int, string asm, OpndItins itins> {
2215  def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
2216                      (ins VR128:$src1, VR128:$src, CC:$cc), asm,
2217                        [(set VR128:$dst, (Int VR128:$src1,
2218                                               VR128:$src, imm:$cc))],
2219                                               itins.rr>;
2220  def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
2221                      (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
2222                        [(set VR128:$dst, (Int VR128:$src1,
2223                                               (load addr:$src), imm:$cc))],
2224                                               itins.rm>;
2225}
2226
2227// Aliases to match intrinsics which expect XMM operand(s).
2228defm Int_VCMPSS  : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
2229                     "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
2230                     SSE_ALU_F32S>,
2231                     XS, VEX_4V;
2232defm Int_VCMPSD  : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
2233                     "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
2234                     SSE_ALU_F32S>, // same latency as f32
2235                     XD, VEX_4V;
2236let Constraints = "$src1 = $dst" in {
2237  defm Int_CMPSS  : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
2238                       "cmp${cc}ss\t{$src, $dst|$dst, $src}",
2239                       SSE_ALU_F32S>, XS;
2240  defm Int_CMPSD  : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
2241                       "cmp${cc}sd\t{$src, $dst|$dst, $src}",
2242                       SSE_ALU_F32S>, // same latency as f32
2243                       XD;
2244}
2245
2246
2247// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
2248multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
2249                            ValueType vt, X86MemOperand x86memop,
2250                            PatFrag ld_frag, string OpcodeStr, Domain d> {
2251  def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
2252                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2253                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
2254                     IIC_SSE_COMIS_RR, d>;
2255  def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
2256                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2257                     [(set EFLAGS, (OpNode (vt RC:$src1),
2258                                           (ld_frag addr:$src2)))],
2259                                           IIC_SSE_COMIS_RM, d>;
2260}
2261
2262let Defs = [EFLAGS] in {
2263  defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
2264                                  "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG;
2265  defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
2266                                  "ucomisd", SSEPackedDouble>, TB, OpSize, VEX,
2267                                  VEX_LIG;
2268  let Pattern = []<dag> in {
2269    defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
2270                                    "comiss", SSEPackedSingle>, TB, VEX,
2271                                    VEX_LIG;
2272    defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
2273                                    "comisd", SSEPackedDouble>, TB, OpSize, VEX,
2274                                    VEX_LIG;
2275  }
2276
2277  defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
2278                            load, "ucomiss", SSEPackedSingle>, TB, VEX;
2279  defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
2280                            load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX;
2281
2282  defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
2283                            load, "comiss", SSEPackedSingle>, TB, VEX;
2284  defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
2285                            load, "comisd", SSEPackedDouble>, TB, OpSize, VEX;
2286  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
2287                                  "ucomiss", SSEPackedSingle>, TB;
2288  defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
2289                                  "ucomisd", SSEPackedDouble>, TB, OpSize;
2290
2291  let Pattern = []<dag> in {
2292    defm COMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
2293                                    "comiss", SSEPackedSingle>, TB;
2294    defm COMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
2295                                    "comisd", SSEPackedDouble>, TB, OpSize;
2296  }
2297
2298  defm Int_UCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
2299                              load, "ucomiss", SSEPackedSingle>, TB;
2300  defm Int_UCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
2301                              load, "ucomisd", SSEPackedDouble>, TB, OpSize;
2302
2303  defm Int_COMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
2304                                  "comiss", SSEPackedSingle>, TB;
2305  defm Int_COMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
2306                                  "comisd", SSEPackedDouble>, TB, OpSize;
2307} // Defs = [EFLAGS]
2308
2309// sse12_cmp_packed - sse 1 & 2 compare packed instructions
2310multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
2311                            Operand CC, Intrinsic Int, string asm,
2312                            string asm_alt, Domain d> {
2313  def rri : PIi8<0xC2, MRMSrcReg,
2314             (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
2315             [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
2316             IIC_SSE_CMPP_RR, d>;
2317  def rmi : PIi8<0xC2, MRMSrcMem,
2318             (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
2319             [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
2320             IIC_SSE_CMPP_RM, d>;
2321
2322  // Accept explicit immediate argument form instead of comparison code.
2323  let neverHasSideEffects = 1 in {
2324    def rri_alt : PIi8<0xC2, MRMSrcReg,
2325               (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
2326               asm_alt, [], IIC_SSE_CMPP_RR, d>;
2327    def rmi_alt : PIi8<0xC2, MRMSrcMem,
2328               (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
2329               asm_alt, [], IIC_SSE_CMPP_RM, d>;
2330  }
2331}
2332
2333defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
2334               "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2335               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2336               SSEPackedSingle>, TB, VEX_4V;
2337defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
2338               "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2339               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2340               SSEPackedDouble>, TB, OpSize, VEX_4V;
2341defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
2342               "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2343               "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2344               SSEPackedSingle>, TB, VEX_4V;
2345defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
2346               "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2347               "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
2348               SSEPackedDouble>, TB, OpSize, VEX_4V;
2349let Constraints = "$src1 = $dst" in {
2350  defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
2351                 "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
2352                 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
2353                 SSEPackedSingle>, TB;
2354  defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
2355                 "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
2356                 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
2357                 SSEPackedDouble>, TB, OpSize;
2358}
2359
2360let Predicates = [HasAVX] in {
2361def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
2362          (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
2363def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
2364          (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
2365def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
2366          (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
2367def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
2368          (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
2369
2370def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
2371          (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
2372def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
2373          (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
2374def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
2375          (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
2376def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
2377          (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
2378}
2379
2380let Predicates = [HasSSE1] in {
2381def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
2382          (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
2383def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
2384          (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
2385}
2386
2387let Predicates = [HasSSE2] in {
2388def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
2389          (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
2390def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
2391          (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
2392}
2393
2394//===----------------------------------------------------------------------===//
2395// SSE 1 & 2 - Shuffle Instructions
2396//===----------------------------------------------------------------------===//
2397
2398/// sse12_shuffle - sse 1 & 2 shuffle instructions
2399multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
2400                         ValueType vt, string asm, PatFrag mem_frag,
2401                         Domain d, bit IsConvertibleToThreeAddress = 0> {
2402  def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
2403                   (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
2404                   [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
2405                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
2406  let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
2407    def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
2408                   (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
2409                   [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
2410                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
2411}
2412
2413defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
2414           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
2415           memopv4f32, SSEPackedSingle>, TB, VEX_4V;
2416defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
2417           "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
2418           memopv8f32, SSEPackedSingle>, TB, VEX_4V;
2419defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
2420           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
2421           memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
2422defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
2423           "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
2424           memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
2425
2426let Constraints = "$src1 = $dst" in {
2427  defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
2428                    "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
2429                    memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
2430                    TB;
2431  defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
2432                    "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
2433                    memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>,
2434                    TB, OpSize;
2435}
2436
2437let Predicates = [HasAVX] in {
2438  def : Pat<(v4i32 (X86Shufp VR128:$src1,
2439                       (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
2440            (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
2441  def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2442            (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
2443
2444  def : Pat<(v2i64 (X86Shufp VR128:$src1,
2445                       (memopv2i64 addr:$src2), (i8 imm:$imm))),
2446            (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
2447  def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2448            (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
2449
2450  // 256-bit patterns
2451  def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
2452            (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
2453  def : Pat<(v8i32 (X86Shufp VR256:$src1,
2454                      (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
2455            (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
2456
2457  def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
2458            (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
2459  def : Pat<(v4i64 (X86Shufp VR256:$src1,
2460                              (memopv4i64 addr:$src2), (i8 imm:$imm))),
2461            (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
2462}
2463
2464let Predicates = [HasSSE1] in {
2465  def : Pat<(v4i32 (X86Shufp VR128:$src1,
2466                       (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
2467            (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
2468  def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2469            (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
2470}
2471
2472let Predicates = [HasSSE2] in {
2473  // Generic SHUFPD patterns
2474  def : Pat<(v2i64 (X86Shufp VR128:$src1,
2475                       (memopv2i64 addr:$src2), (i8 imm:$imm))),
2476            (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
2477  def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
2478            (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
2479}
2480
2481//===----------------------------------------------------------------------===//
2482// SSE 1 & 2 - Unpack Instructions
2483//===----------------------------------------------------------------------===//
2484
2485/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
2486multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
2487                                   PatFrag mem_frag, RegisterClass RC,
2488                                   X86MemOperand x86memop, string asm,
2489                                   Domain d> {
2490    def rr : PI<opc, MRMSrcReg,
2491                (outs RC:$dst), (ins RC:$src1, RC:$src2),
2492                asm, [(set RC:$dst,
2493                           (vt (OpNode RC:$src1, RC:$src2)))],
2494                           IIC_SSE_UNPCK, d>;
2495    def rm : PI<opc, MRMSrcMem,
2496                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
2497                asm, [(set RC:$dst,
2498                           (vt (OpNode RC:$src1,
2499                                       (mem_frag addr:$src2))))],
2500                                       IIC_SSE_UNPCK, d>;
2501}
2502
2503defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
2504      VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2505                     SSEPackedSingle>, TB, VEX_4V;
2506defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
2507      VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2508                     SSEPackedDouble>, TB, OpSize, VEX_4V;
2509defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
2510      VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2511                     SSEPackedSingle>, TB, VEX_4V;
2512defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
2513      VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2514                     SSEPackedDouble>, TB, OpSize, VEX_4V;
2515
2516defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32,
2517      VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2518                     SSEPackedSingle>, TB, VEX_4V;
2519defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64,
2520      VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2521                     SSEPackedDouble>, TB, OpSize, VEX_4V;
2522defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32,
2523      VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2524                     SSEPackedSingle>, TB, VEX_4V;
2525defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64,
2526      VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2527                     SSEPackedDouble>, TB, OpSize, VEX_4V;
2528
2529let Constraints = "$src1 = $dst" in {
2530  defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
2531        VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
2532                       SSEPackedSingle>, TB;
2533  defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
2534        VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
2535                       SSEPackedDouble>, TB, OpSize;
2536  defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
2537        VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
2538                       SSEPackedSingle>, TB;
2539  defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
2540        VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
2541                       SSEPackedDouble>, TB, OpSize;
2542} // Constraints = "$src1 = $dst"
2543
2544let Predicates = [HasAVX], AddedComplexity = 1 in {
2545  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
2546  // problem is during lowering, where it's not possible to recognize the load
2547  // fold cause it has two uses through a bitcast. One use disappears at isel
2548  // time and the fold opportunity reappears.
2549  def : Pat<(v2f64 (X86Movddup VR128:$src)),
2550            (VUNPCKLPDrr VR128:$src, VR128:$src)>;
2551}
2552
2553let Predicates = [HasSSE2] in {
2554  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
2555  // problem is during lowering, where it's not possible to recognize the load
2556  // fold cause it has two uses through a bitcast. One use disappears at isel
2557  // time and the fold opportunity reappears.
2558  def : Pat<(v2f64 (X86Movddup VR128:$src)),
2559            (UNPCKLPDrr VR128:$src, VR128:$src)>;
2560}
2561
2562//===----------------------------------------------------------------------===//
2563// SSE 1 & 2 - Extract Floating-Point Sign mask
2564//===----------------------------------------------------------------------===//
2565
2566/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
2567multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
2568                                Domain d> {
2569  def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
2570                !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
2571                     [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
2572  def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
2573                !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
2574                IIC_SSE_MOVMSK, d>, REX_W;
2575}
2576
2577let Predicates = [HasAVX] in {
2578  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
2579                                        "movmskps", SSEPackedSingle>, TB, VEX;
2580  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
2581                                        "movmskpd", SSEPackedDouble>, TB,
2582                                        OpSize, VEX;
2583  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
2584                                        "movmskps", SSEPackedSingle>, TB, VEX;
2585  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
2586                                        "movmskpd", SSEPackedDouble>, TB,
2587                                        OpSize, VEX;
2588
2589  def : Pat<(i32 (X86fgetsign FR32:$src)),
2590            (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
2591                                          sub_ss))>;
2592  def : Pat<(i64 (X86fgetsign FR32:$src)),
2593            (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
2594                                          sub_ss))>;
2595  def : Pat<(i32 (X86fgetsign FR64:$src)),
2596            (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
2597                                          sub_sd))>;
2598  def : Pat<(i64 (X86fgetsign FR64:$src)),
2599            (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
2600                                          sub_sd))>;
2601
2602  // Assembler Only
2603  def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
2604             "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
2605             SSEPackedSingle>, TB, VEX;
2606  def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
2607             "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
2608             SSEPackedDouble>, TB,
2609             OpSize, VEX;
2610  def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
2611             "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
2612             SSEPackedSingle>, TB, VEX;
2613  def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
2614             "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
2615             SSEPackedDouble>, TB,
2616             OpSize, VEX;
2617}
2618
2619defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
2620                                     SSEPackedSingle>, TB;
2621defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
2622                                     SSEPackedDouble>, TB, OpSize;
2623
2624def : Pat<(i32 (X86fgetsign FR32:$src)),
2625          (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
2626                                       sub_ss))>, Requires<[HasSSE1]>;
2627def : Pat<(i64 (X86fgetsign FR32:$src)),
2628          (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
2629                                       sub_ss))>, Requires<[HasSSE1]>;
2630def : Pat<(i32 (X86fgetsign FR64:$src)),
2631          (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
2632                                       sub_sd))>, Requires<[HasSSE2]>;
2633def : Pat<(i64 (X86fgetsign FR64:$src)),
2634          (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
2635                                       sub_sd))>, Requires<[HasSSE2]>;
2636
2637//===---------------------------------------------------------------------===//
2638// SSE2 - Packed Integer Logical Instructions
2639//===---------------------------------------------------------------------===//
2640
2641let ExeDomain = SSEPackedInt in { // SSE integer instructions
2642
2643/// PDI_binop_rm - Simple SSE2 binary operator.
2644multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
2645                        ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
2646                        X86MemOperand x86memop,
2647                        OpndItins itins,
2648                        bit IsCommutable = 0,
2649                        bit Is2Addr = 1> {
2650  let isCommutable = IsCommutable in
2651  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
2652       (ins RC:$src1, RC:$src2),
2653       !if(Is2Addr,
2654           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2655           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
2656       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
2657  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
2658       (ins RC:$src1, x86memop:$src2),
2659       !if(Is2Addr,
2660           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
2661           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
2662       [(set RC:$dst, (OpVT (OpNode RC:$src1,
2663                                     (bitconvert (memop_frag addr:$src2)))))],
2664                                     itins.rm>;
2665}
2666} // ExeDomain = SSEPackedInt
2667
2668// These are ordered here for pattern ordering requirements with the fp versions
2669
2670let Predicates = [HasAVX] in {
2671defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64,
2672                          i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
2673defm VPOR  : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
2674                          i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
2675defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
2676                          i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
2677defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
2678                          i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
2679}
2680
2681let Constraints = "$src1 = $dst" in {
2682defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64,
2683                         i128mem, SSE_BIT_ITINS_P, 1>;
2684defm POR  : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
2685                         i128mem, SSE_BIT_ITINS_P, 1>;
2686defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
2687                         i128mem, SSE_BIT_ITINS_P, 1>;
2688defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
2689                          i128mem, SSE_BIT_ITINS_P, 0>;
2690} // Constraints = "$src1 = $dst"
2691
2692let Predicates = [HasAVX2] in {
2693defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
2694                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
2695defm VPORY  : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
2696                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
2697defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
2698                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
2699defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
2700                            i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
2701}
2702
2703//===----------------------------------------------------------------------===//
2704// SSE 1 & 2 - Logical Instructions
2705//===----------------------------------------------------------------------===//
2706
2707/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
2708///
2709multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
2710                                       SDNode OpNode, OpndItins itins> {
2711  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
2712              FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>,
2713              TB, VEX_4V;
2714
2715  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
2716        FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>,
2717        TB, OpSize, VEX_4V;
2718
2719  let Constraints = "$src1 = $dst" in {
2720    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
2721                f32, f128mem, memopfsf32, SSEPackedSingle, itins>,
2722                TB;
2723
2724    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
2725                f64, f128mem, memopfsf64, SSEPackedDouble, itins>,
2726                TB, OpSize;
2727  }
2728}
2729
2730// Alias bitwise logical operations using SSE logical ops on packed FP values.
2731let mayLoad = 0 in {
2732  defm FsAND  : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
2733                SSE_BIT_ITINS_P>;
2734  defm FsOR   : sse12_fp_alias_pack_logical<0x56, "or", X86for,
2735                SSE_BIT_ITINS_P>;
2736  defm FsXOR  : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
2737                SSE_BIT_ITINS_P>;
2738}
2739
2740let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
2741  defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef,
2742                SSE_BIT_ITINS_P>;
2743
2744/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
2745///
2746multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
2747                                   SDNode OpNode> {
2748  // In AVX no need to add a pattern for 128-bit logical rr ps, because they
2749  // are all promoted to v2i64, and the patterns are covered by the int
2750  // version. This is needed in SSE only, because v2i64 isn't supported on
2751  // SSE1, but only on SSE2.
2752  defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
2753       !strconcat(OpcodeStr, "ps"), f128mem, [],
2754       [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
2755                                 (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
2756
2757  defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
2758       !strconcat(OpcodeStr, "pd"), f128mem,
2759       [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2760                                 (bc_v2i64 (v2f64 VR128:$src2))))],
2761       [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2762                                 (memopv2i64 addr:$src2)))], 0>,
2763                                                 TB, OpSize, VEX_4V;
2764  let Constraints = "$src1 = $dst" in {
2765    defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
2766         !strconcat(OpcodeStr, "ps"), f128mem,
2767         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
2768         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
2769                                   (memopv2i64 addr:$src2)))]>, TB;
2770
2771    defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
2772         !strconcat(OpcodeStr, "pd"), f128mem,
2773         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2774                                   (bc_v2i64 (v2f64 VR128:$src2))))],
2775         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
2776                                   (memopv2i64 addr:$src2)))]>, TB, OpSize;
2777  }
2778}
2779
2780/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
2781///
2782multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
2783                                     SDNode OpNode> {
2784    defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
2785          !strconcat(OpcodeStr, "ps"), f256mem,
2786          [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
2787          [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
2788                                    (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V;
2789
2790    defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
2791          !strconcat(OpcodeStr, "pd"), f256mem,
2792          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
2793                                    (bc_v4i64 (v4f64 VR256:$src2))))],
2794          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
2795                                    (memopv4i64 addr:$src2)))], 0>,
2796                                    TB, OpSize, VEX_4V;
2797}
2798
2799// AVX 256-bit packed logical ops forms
2800defm VAND  : sse12_fp_packed_logical_y<0x54, "and", and>;
2801defm VOR   : sse12_fp_packed_logical_y<0x56, "or", or>;
2802defm VXOR  : sse12_fp_packed_logical_y<0x57, "xor", xor>;
2803defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
2804
2805defm AND  : sse12_fp_packed_logical<0x54, "and", and>;
2806defm OR   : sse12_fp_packed_logical<0x56, "or", or>;
2807defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
2808let isCommutable = 0 in
2809  defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
2810
2811//===----------------------------------------------------------------------===//
2812// SSE 1 & 2 - Arithmetic Instructions
2813//===----------------------------------------------------------------------===//
2814
2815/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
2816/// vector forms.
2817///
2818/// In addition, we also have a special variant of the scalar form here to
2819/// represent the associated intrinsic operation.  This form is unlike the
2820/// plain scalar form, in that it takes an entire vector (instead of a scalar)
2821/// and leaves the top elements unmodified (therefore these cannot be commuted).
2822///
2823/// These three forms can each be reg+reg or reg+mem.
2824///
2825
2826/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
2827/// classes below
2828multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
2829                                  SizeItins itins,
2830                                  bit Is2Addr = 1> {
2831  defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
2832                            OpNode, FR32, f32mem,
2833                            itins.s, Is2Addr>, XS;
2834  defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
2835                            OpNode, FR64, f64mem,
2836                            itins.d, Is2Addr>, XD;
2837}
2838
2839multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
2840                                   SizeItins itins,
2841                                   bit Is2Addr = 1> {
2842  let mayLoad = 0 in {
2843  defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
2844              v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>,
2845              TB;
2846  defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
2847              v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>,
2848              TB, OpSize;
2849  }
2850}
2851
2852multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
2853                                    SDNode OpNode,
2854                                    SizeItins itins> {
2855  let mayLoad = 0 in {
2856    defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
2857                v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
2858                TB;
2859    defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
2860                v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
2861                TB, OpSize;
2862  }
2863}
2864
2865multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
2866                                      SizeItins itins,
2867                                      bit Is2Addr = 1> {
2868  defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
2869     !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
2870     itins.s, Is2Addr>, XS;
2871  defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
2872     !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
2873     itins.d, Is2Addr>, XD;
2874}
2875
2876multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
2877                                      SizeItins itins,
2878                                      bit Is2Addr = 1> {
2879  defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
2880     !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
2881                              SSEPackedSingle, itins.s, Is2Addr>,
2882                              TB;
2883
2884  defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
2885     !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
2886                              SSEPackedDouble, itins.d, Is2Addr>,
2887                              TB, OpSize;
2888}
2889
2890multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr,
2891                                        SizeItins itins> {
2892  defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
2893     !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
2894      SSEPackedSingle, itins.s, 0>, TB;
2895
2896  defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
2897     !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
2898      SSEPackedDouble, itins.d, 0>, TB, OpSize;
2899}
2900
2901// Binary Arithmetic instructions
2902defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
2903            basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
2904              VEX_4V, VEX_LIG;
2905defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>,
2906            basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>,
2907              VEX_4V;
2908defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
2909            basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
2910              VEX_4V, VEX_LIG;
2911defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>,
2912            basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
2913              VEX_4V;
2914
2915let isCommutable = 0 in {
2916  defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
2917              basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
2918                VEX_4V, VEX_LIG;
2919  defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
2920              basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V;
2921  defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
2922              basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
2923                VEX_4V, VEX_LIG;
2924  defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>,
2925              basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
2926                VEX_4V;
2927  defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
2928              basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
2929                VEX_4V, VEX_LIG;
2930  defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>,
2931              basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>,
2932              basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
2933              basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>,
2934                VEX_4V;
2935  defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
2936              basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
2937                VEX_4V, VEX_LIG;
2938  defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>,
2939              basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>,
2940              basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>,
2941              basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
2942                VEX_4V;
2943}
2944
2945let Constraints = "$src1 = $dst" in {
2946  defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
2947             basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
2948             basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
2949  defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
2950             basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
2951             basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
2952
2953  let isCommutable = 0 in {
2954    defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
2955               basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
2956               basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
2957    defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
2958               basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
2959               basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
2960    defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
2961               basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
2962               basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>,
2963               basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>;
2964    defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
2965               basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
2966               basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>,
2967               basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>;
2968  }
2969}
2970
2971/// Unop Arithmetic
2972/// In addition, we also have a special variant of the scalar form here to
2973/// represent the associated intrinsic operation.  This form is unlike the
2974/// plain scalar form, in that it takes an entire vector (instead of a
2975/// scalar) and leaves the top elements undefined.
2976///
2977/// And, we have a special variant form for a full-vector intrinsic form.
2978
2979def SSE_SQRTP : OpndItins<
2980  IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
2981>;
2982
2983def SSE_SQRTS : OpndItins<
2984  IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
2985>;
2986
2987def SSE_RCPP : OpndItins<
2988  IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
2989>;
2990
2991def SSE_RCPS : OpndItins<
2992  IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
2993>;
2994
2995/// sse1_fp_unop_s - SSE1 unops in scalar form.
2996multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
2997                          SDNode OpNode, Intrinsic F32Int, OpndItins itins> {
2998  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
2999                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3000                [(set FR32:$dst, (OpNode FR32:$src))]>;
3001  // For scalar unary operations, fold a load into the operation
3002  // only in OptForSize mode. It eliminates an instruction, but it also
3003  // eliminates a whole-register clobber (the load), so it introduces a
3004  // partial register update condition.
3005  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
3006                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3007                [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
3008            Requires<[HasSSE1, OptForSize]>;
3009  def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3010                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3011                    [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
3012  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
3013                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
3014                    [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
3015}
3016
3017/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
3018multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
3019  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
3020                !strconcat(OpcodeStr,
3021                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
3022  let mayLoad = 1 in
3023  def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2),
3024                !strconcat(OpcodeStr,
3025                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
3026  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
3027                (ins VR128:$src1, ssmem:$src2),
3028                !strconcat(OpcodeStr,
3029                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
3030}
3031
3032/// sse1_fp_unop_p - SSE1 unops in packed form.
3033multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
3034                          OpndItins itins> {
3035  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3036              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3037              [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
3038  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3039                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3040                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
3041}
3042
3043/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
3044multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
3045                            OpndItins itins> {
3046  def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3047              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3048              [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
3049              itins.rr>;
3050  def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
3051                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3052                [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
3053                itins.rm>;
3054}
3055
3056/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
3057multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
3058                              Intrinsic V4F32Int, OpndItins itins> {
3059  def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3060                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3061                    [(set VR128:$dst, (V4F32Int VR128:$src))],
3062                    itins.rr>;
3063  def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3064                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3065                    [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
3066                    itins.rm>;
3067}
3068
3069/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
3070multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
3071                                Intrinsic V4F32Int, OpndItins itins> {
3072  def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3073                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3074                    [(set VR256:$dst, (V4F32Int VR256:$src))],
3075                    itins.rr>;
3076  def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
3077                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
3078                    [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))],
3079                    itins.rm>;
3080}
3081
3082/// sse2_fp_unop_s - SSE2 unops in scalar form.
3083multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
3084                          SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
3085  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
3086                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3087                [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
3088  // See the comments in sse1_fp_unop_s for why this is OptForSize.
3089  def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
3090                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3091                [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
3092            Requires<[HasSSE2, OptForSize]>;
3093  def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3094                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3095                    [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
3096  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
3097                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
3098                    [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
3099}
3100
3101/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
3102multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
3103  let neverHasSideEffects = 1 in {
3104  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
3105               !strconcat(OpcodeStr,
3106                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
3107  let mayLoad = 1 in
3108  def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
3109               !strconcat(OpcodeStr,
3110                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
3111  }
3112  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
3113               (ins VR128:$src1, sdmem:$src2),
3114               !strconcat(OpcodeStr,
3115                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
3116}
3117
3118/// sse2_fp_unop_p - SSE2 unops in vector forms.
3119multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
3120                          SDNode OpNode, OpndItins itins> {
3121  def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3122              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3123              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
3124  def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3125                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3126                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
3127}
3128
3129/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
3130multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
3131                          OpndItins itins> {
3132  def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3133              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3134              [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
3135              itins.rr>;
3136  def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
3137                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3138                [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
3139                itins.rm>;
3140}
3141
3142/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
3143multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
3144                              Intrinsic V2F64Int, OpndItins itins> {
3145  def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3146                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3147                    [(set VR128:$dst, (V2F64Int VR128:$src))],
3148                    itins.rr>;
3149  def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
3150                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3151                    [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))],
3152                    itins.rm>;
3153}
3154
3155/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
3156multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
3157                                Intrinsic V2F64Int, OpndItins itins> {
3158  def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3159                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3160                    [(set VR256:$dst, (V2F64Int VR256:$src))],
3161                    itins.rr>;
3162  def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
3163                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
3164                    [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))],
3165                    itins.rm>;
3166}
3167
3168let Predicates = [HasAVX] in {
3169  // Square root.
3170  defm VSQRT  : sse1_fp_unop_s_avx<0x51, "vsqrt">,
3171                sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
3172
3173  defm VSQRT  : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
3174                sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
3175                sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
3176                sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
3177                sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps,
3178                                   SSE_SQRTP>,
3179                sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd,
3180                                    SSE_SQRTP>,
3181                sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256,
3182                                    SSE_SQRTP>,
3183                sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256,
3184                                    SSE_SQRTP>,
3185                VEX;
3186
3187  // Reciprocal approximations. Note that these typically require refinement
3188  // in order to obtain suitable precision.
3189  defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
3190  defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
3191                sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
3192                sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256,
3193                                    SSE_SQRTP>,
3194                sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps,
3195                                    SSE_SQRTP>, VEX;
3196
3197  defm VRCP   : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
3198  defm VRCP   : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>,
3199                sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>,
3200                sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256,
3201                                    SSE_RCPP>,
3202                sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps,
3203                                    SSE_RCPP>, VEX;
3204}
3205
3206let AddedComplexity = 1 in {
3207def : Pat<(f32 (fsqrt FR32:$src)),
3208          (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3209def : Pat<(f32 (fsqrt (load addr:$src))),
3210          (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3211          Requires<[HasAVX, OptForSize]>;
3212def : Pat<(f64 (fsqrt FR64:$src)),
3213          (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
3214def : Pat<(f64 (fsqrt (load addr:$src))),
3215          (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
3216          Requires<[HasAVX, OptForSize]>;
3217
3218def : Pat<(f32 (X86frsqrt FR32:$src)),
3219          (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3220def : Pat<(f32 (X86frsqrt (load addr:$src))),
3221          (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3222          Requires<[HasAVX, OptForSize]>;
3223
3224def : Pat<(f32 (X86frcp FR32:$src)),
3225          (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
3226def : Pat<(f32 (X86frcp (load addr:$src))),
3227          (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
3228          Requires<[HasAVX, OptForSize]>;
3229}
3230
3231let Predicates = [HasAVX], AddedComplexity = 1 in {
3232  def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
3233            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
3234                (VSQRTSSr (f32 (IMPLICIT_DEF)),
3235                          (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
3236                sub_ss)>;
3237  def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
3238            (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3239
3240  def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
3241            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
3242                (VSQRTSDr (f64 (IMPLICIT_DEF)),
3243                          (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)),
3244                sub_sd)>;
3245  def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
3246            (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
3247
3248  def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
3249            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
3250                (VRSQRTSSr (f32 (IMPLICIT_DEF)),
3251                          (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
3252                sub_ss)>;
3253  def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
3254            (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3255
3256  def : Pat<(int_x86_sse_rcp_ss VR128:$src),
3257            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
3258                (VRCPSSr (f32 (IMPLICIT_DEF)),
3259                         (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
3260                sub_ss)>;
3261  def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
3262            (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
3263}
3264
3265// Square root.
3266defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss,
3267                            SSE_SQRTS>,
3268             sse1_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
3269             sse1_fp_unop_p_int<0x51, "sqrt",  int_x86_sse_sqrt_ps, SSE_SQRTS>,
3270             sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd,
3271                            SSE_SQRTS>,
3272             sse2_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
3273             sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
3274
3275// Reciprocal approximations. Note that these typically require refinement
3276// in order to obtain suitable precision.
3277defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
3278                            SSE_SQRTS>,
3279             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
3280             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
3281                            SSE_SQRTS>;
3282defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
3283                            SSE_RCPS>,
3284             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
3285             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
3286
3287// There is no f64 version of the reciprocal approximation instructions.
3288
3289//===----------------------------------------------------------------------===//
3290// SSE 1 & 2 - Non-temporal stores
3291//===----------------------------------------------------------------------===//
3292
3293let AddedComplexity = 400 in { // Prefer non-temporal versions
3294  def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
3295                       (ins f128mem:$dst, VR128:$src),
3296                       "movntps\t{$src, $dst|$dst, $src}",
3297                       [(alignednontemporalstore (v4f32 VR128:$src),
3298                                                 addr:$dst)],
3299                                                 IIC_SSE_MOVNT>, VEX;
3300  def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
3301                       (ins f128mem:$dst, VR128:$src),
3302                       "movntpd\t{$src, $dst|$dst, $src}",
3303                       [(alignednontemporalstore (v2f64 VR128:$src),
3304                                                 addr:$dst)],
3305                                                 IIC_SSE_MOVNT>, VEX;
3306
3307  let ExeDomain = SSEPackedInt in
3308  def VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
3309                           (ins f128mem:$dst, VR128:$src),
3310                           "movntdq\t{$src, $dst|$dst, $src}",
3311                           [(alignednontemporalstore (v2i64 VR128:$src),
3312                                                     addr:$dst)],
3313                                                     IIC_SSE_MOVNT>, VEX;
3314
3315  def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
3316            (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
3317
3318  def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
3319                       (ins f256mem:$dst, VR256:$src),
3320                       "movntps\t{$src, $dst|$dst, $src}",
3321                       [(alignednontemporalstore (v8f32 VR256:$src),
3322                                                 addr:$dst)],
3323                                                 IIC_SSE_MOVNT>, VEX;
3324  def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
3325                       (ins f256mem:$dst, VR256:$src),
3326                       "movntpd\t{$src, $dst|$dst, $src}",
3327                       [(alignednontemporalstore (v4f64 VR256:$src),
3328                                                 addr:$dst)],
3329                                                 IIC_SSE_MOVNT>, VEX;
3330  let ExeDomain = SSEPackedInt in
3331  def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
3332                      (ins f256mem:$dst, VR256:$src),
3333                      "movntdq\t{$src, $dst|$dst, $src}",
3334                      [(alignednontemporalstore (v4i64 VR256:$src),
3335                                                addr:$dst)],
3336                                                IIC_SSE_MOVNT>, VEX;
3337}
3338
3339def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
3340          (VMOVNTDQYmr addr:$dst, VR256:$src)>;
3341def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
3342          (VMOVNTPDYmr addr:$dst, VR256:$src)>;
3343def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
3344          (VMOVNTPSYmr addr:$dst, VR256:$src)>;
3345
3346let AddedComplexity = 400 in { // Prefer non-temporal versions
3347def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3348                    "movntps\t{$src, $dst|$dst, $src}",
3349                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
3350                    IIC_SSE_MOVNT>;
3351def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3352                    "movntpd\t{$src, $dst|$dst, $src}",
3353                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)],
3354                    IIC_SSE_MOVNT>;
3355
3356let ExeDomain = SSEPackedInt in
3357def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
3358                    "movntdq\t{$src, $dst|$dst, $src}",
3359                    [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
3360                    IIC_SSE_MOVNT>;
3361
3362def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
3363          (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
3364
3365// There is no AVX form for instructions below this point
3366def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
3367                 "movnti{l}\t{$src, $dst|$dst, $src}",
3368                 [(nontemporalstore (i32 GR32:$src), addr:$dst)],
3369                 IIC_SSE_MOVNT>,
3370               TB, Requires<[HasSSE2]>;
3371def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
3372                     "movnti{q}\t{$src, $dst|$dst, $src}",
3373                     [(nontemporalstore (i64 GR64:$src), addr:$dst)],
3374                     IIC_SSE_MOVNT>,
3375                  TB, Requires<[HasSSE2]>;
3376}
3377
3378//===----------------------------------------------------------------------===//
3379// SSE 1 & 2 - Prefetch and memory fence
3380//===----------------------------------------------------------------------===//
3381
3382// Prefetch intrinsic.
3383let Predicates = [HasSSE1] in {
3384def PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),
3385    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
3386    IIC_SSE_PREFETCH>, TB;
3387def PREFETCHT1   : I<0x18, MRM2m, (outs), (ins i8mem:$src),
3388    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))],
3389    IIC_SSE_PREFETCH>, TB;
3390def PREFETCHT2   : I<0x18, MRM3m, (outs), (ins i8mem:$src),
3391    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))],
3392    IIC_SSE_PREFETCH>, TB;
3393def PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),
3394    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))],
3395    IIC_SSE_PREFETCH>, TB;
3396}
3397
3398// Flush cache
3399def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
3400               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
3401               IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>;
3402
3403// Pause. This "instruction" is encoded as "rep; nop", so even though it
3404// was introduced with SSE2, it's backward compatible.
3405def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", [], IIC_SSE_PAUSE>, REP;
3406
3407// Load, store, and memory fence
3408def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
3409               "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
3410               TB, Requires<[HasSSE1]>;
3411def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
3412               "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>,
3413               TB, Requires<[HasSSE2]>;
3414def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
3415               "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
3416               TB, Requires<[HasSSE2]>;
3417
3418def : Pat<(X86SFence), (SFENCE)>;
3419def : Pat<(X86LFence), (LFENCE)>;
3420def : Pat<(X86MFence), (MFENCE)>;
3421
3422//===----------------------------------------------------------------------===//
3423// SSE 1 & 2 - Load/Store XCSR register
3424//===----------------------------------------------------------------------===//
3425
3426def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
3427                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
3428                  IIC_SSE_LDMXCSR>, VEX;
3429def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
3430                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
3431                  IIC_SSE_STMXCSR>, VEX;
3432
3433def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
3434                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
3435                  IIC_SSE_LDMXCSR>;
3436def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
3437                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
3438                  IIC_SSE_STMXCSR>;
3439
3440//===---------------------------------------------------------------------===//
3441// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
3442//===---------------------------------------------------------------------===//
3443
3444let ExeDomain = SSEPackedInt in { // SSE integer instructions
3445
3446let neverHasSideEffects = 1 in {
3447def VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3448                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
3449                    VEX;
3450def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3451                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
3452                    VEX;
3453}
3454def VMOVDQUrr  : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3455                    "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
3456                    VEX;
3457def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
3458                    "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
3459                    VEX;
3460
3461// For Disassembler
3462let isCodeGenOnly = 1 in {
3463def VMOVDQArr_REV  : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3464                        "movdqa\t{$src, $dst|$dst, $src}", [],
3465                        IIC_SSE_MOVA_P_RR>,
3466                        VEX;
3467def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
3468                        "movdqa\t{$src, $dst|$dst, $src}", [],
3469                        IIC_SSE_MOVA_P_RR>,
3470                        VEX;
3471def VMOVDQUrr_REV  : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3472                        "movdqu\t{$src, $dst|$dst, $src}", [],
3473                        IIC_SSE_MOVU_P_RR>,
3474                        VEX;
3475def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
3476                        "movdqu\t{$src, $dst|$dst, $src}", [],
3477                        IIC_SSE_MOVU_P_RR>,
3478                        VEX;
3479}
3480
3481let canFoldAsLoad = 1, mayLoad = 1 in {
3482def VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3483                   "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
3484                   VEX;
3485def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
3486                   "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
3487                   VEX;
3488let Predicates = [HasAVX] in {
3489  def VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3490                    "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
3491                    XS, VEX;
3492  def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
3493                    "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
3494                    XS, VEX;
3495}
3496}
3497
3498let mayStore = 1 in {
3499def VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
3500                     (ins i128mem:$dst, VR128:$src),
3501                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
3502                     VEX;
3503def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
3504                     (ins i256mem:$dst, VR256:$src),
3505                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
3506                     VEX;
3507let Predicates = [HasAVX] in {
3508def VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3509                  "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
3510                  XS, VEX;
3511def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
3512                  "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
3513                  XS, VEX;
3514}
3515}
3516
3517let neverHasSideEffects = 1 in
3518def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3519                   "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
3520
3521def MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
3522                   "movdqu\t{$src, $dst|$dst, $src}",
3523                   [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
3524
3525// For Disassembler
3526let isCodeGenOnly = 1 in {
3527def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3528                       "movdqa\t{$src, $dst|$dst, $src}", [],
3529                       IIC_SSE_MOVA_P_RR>;
3530
3531def MOVDQUrr_REV :   I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
3532                       "movdqu\t{$src, $dst|$dst, $src}",
3533                       [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
3534}
3535
3536let canFoldAsLoad = 1, mayLoad = 1 in {
3537def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3538                   "movdqa\t{$src, $dst|$dst, $src}",
3539                   [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
3540                   IIC_SSE_MOVA_P_RM>;
3541def MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
3542                   "movdqu\t{$src, $dst|$dst, $src}",
3543                   [/*(set VR128:$dst, (loadv2i64 addr:$src))*/],
3544                   IIC_SSE_MOVU_P_RM>,
3545                 XS, Requires<[HasSSE2]>;
3546}
3547
3548let mayStore = 1 in {
3549def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3550                   "movdqa\t{$src, $dst|$dst, $src}",
3551                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
3552                   IIC_SSE_MOVA_P_MR>;
3553def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3554                   "movdqu\t{$src, $dst|$dst, $src}",
3555                   [/*(store (v2i64 VR128:$src), addr:$dst)*/],
3556                   IIC_SSE_MOVU_P_MR>,
3557                 XS, Requires<[HasSSE2]>;
3558}
3559
3560// Intrinsic forms of MOVDQU load and store
3561def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3562                       "vmovdqu\t{$src, $dst|$dst, $src}",
3563                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
3564                       IIC_SSE_MOVU_P_MR>,
3565                     XS, VEX, Requires<[HasAVX]>;
3566
3567def MOVDQUmr_Int :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
3568                       "movdqu\t{$src, $dst|$dst, $src}",
3569                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
3570                       IIC_SSE_MOVU_P_MR>,
3571                     XS, Requires<[HasSSE2]>;
3572
3573} // ExeDomain = SSEPackedInt
3574
3575let Predicates = [HasAVX] in {
3576  def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
3577            (VMOVDQUYmr addr:$dst, VR256:$src)>;
3578}
3579
3580//===---------------------------------------------------------------------===//
3581// SSE2 - Packed Integer Arithmetic Instructions
3582//===---------------------------------------------------------------------===//
3583
3584def SSE_PMADD : OpndItins<
3585  IIC_SSE_PMADD, IIC_SSE_PMADD
3586>;
3587
3588let ExeDomain = SSEPackedInt in { // SSE integer instructions
3589
3590multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
3591                            RegisterClass RC, PatFrag memop_frag,
3592                            X86MemOperand x86memop,
3593                            OpndItins itins,
3594                            bit IsCommutable = 0,
3595                            bit Is2Addr = 1> {
3596  let isCommutable = IsCommutable in
3597  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
3598       (ins RC:$src1, RC:$src2),
3599       !if(Is2Addr,
3600           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3601           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3602       [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
3603  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
3604       (ins RC:$src1, x86memop:$src2),
3605       !if(Is2Addr,
3606           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3607           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3608       [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
3609       itins.rm>;
3610}
3611
3612multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
3613                         string OpcodeStr, SDNode OpNode,
3614                         SDNode OpNode2, RegisterClass RC,
3615                         ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
3616                         ShiftOpndItins itins,
3617                         bit Is2Addr = 1> {
3618  // src2 is always 128-bit
3619  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
3620       (ins RC:$src1, VR128:$src2),
3621       !if(Is2Addr,
3622           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3623           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3624       [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
3625        itins.rr>;
3626  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
3627       (ins RC:$src1, i128mem:$src2),
3628       !if(Is2Addr,
3629           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3630           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3631       [(set RC:$dst, (DstVT (OpNode RC:$src1,
3632                       (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
3633  def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
3634       (ins RC:$src1, i32i8imm:$src2),
3635       !if(Is2Addr,
3636           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3637           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3638       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
3639}
3640
3641/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types
3642multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
3643                         ValueType DstVT, ValueType SrcVT, RegisterClass RC,
3644                         PatFrag memop_frag, X86MemOperand x86memop,
3645                         OpndItins itins,
3646                         bit IsCommutable = 0, bit Is2Addr = 1> {
3647  let isCommutable = IsCommutable in
3648  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
3649       (ins RC:$src1, RC:$src2),
3650       !if(Is2Addr,
3651           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3652           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3653       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
3654  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
3655       (ins RC:$src1, x86memop:$src2),
3656       !if(Is2Addr,
3657           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
3658           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
3659       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
3660                                     (bitconvert (memop_frag addr:$src2)))))]>;
3661}
3662} // ExeDomain = SSEPackedInt
3663
3664// 128-bit Integer Arithmetic
3665
3666let Predicates = [HasAVX] in {
3667defm VPADDB  : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64,
3668                            i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>,
3669                            VEX_4V;
3670defm VPADDW  : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
3671                            i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3672defm VPADDD  : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
3673                            i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3674defm VPADDQ  : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64,
3675                            i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
3676defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
3677                            i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
3678defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
3679                            i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3680defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
3681                            i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3682defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
3683                            i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3684defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64,
3685                            i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
3686defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
3687                              memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
3688                              VEX_4V;
3689
3690// Intrinsic forms
3691defm VPSUBSB  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
3692                                 VR128, memopv2i64, i128mem,
3693                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3694defm VPSUBSW  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w,
3695                                 VR128, memopv2i64, i128mem,
3696                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3697defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b,
3698                                 VR128, memopv2i64, i128mem,
3699                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3700defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w,
3701                                 VR128, memopv2i64, i128mem,
3702                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3703defm VPADDSB  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b,
3704                                 VR128, memopv2i64, i128mem,
3705                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3706defm VPADDSW  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w,
3707                                 VR128, memopv2i64, i128mem,
3708                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3709defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b,
3710                                 VR128, memopv2i64, i128mem,
3711                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3712defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w,
3713                                 VR128, memopv2i64, i128mem,
3714                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3715defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w,
3716                                 VR128, memopv2i64, i128mem,
3717                                 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
3718defm VPMULHW  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w,
3719                                 VR128, memopv2i64, i128mem,
3720                                 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
3721defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd,
3722                                 VR128, memopv2i64, i128mem,
3723                                 SSE_PMADD, 1, 0>, VEX_4V;
3724defm VPAVGB   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
3725                                 VR128, memopv2i64, i128mem,
3726                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3727defm VPAVGW   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
3728                                 VR128, memopv2i64, i128mem,
3729                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3730defm VPMINUB  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
3731                                 VR128, memopv2i64, i128mem,
3732                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3733defm VPMINSW  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
3734                                 VR128, memopv2i64, i128mem,
3735                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3736defm VPMAXUB  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
3737                                 VR128, memopv2i64, i128mem,
3738                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3739defm VPMAXSW  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
3740                                 VR128, memopv2i64, i128mem,
3741                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3742defm VPSADBW  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
3743                                 VR128, memopv2i64, i128mem,
3744                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3745}
3746
3747let Predicates = [HasAVX2] in {
3748defm VPADDBY  : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
3749                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3750defm VPADDWY  : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
3751                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3752defm VPADDDY  : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
3753                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3754defm VPADDQY  : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
3755                             i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
3756defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
3757                             i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
3758defm VPSUBBY  : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
3759                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3760defm VPSUBWY  : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
3761                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3762defm VPSUBDY  : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
3763                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3764defm VPSUBQY  : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
3765                             i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
3766defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
3767                               VR256, memopv4i64, i256mem,
3768                               SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
3769
3770// Intrinsic forms
3771defm VPSUBSBY  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
3772                                  VR256, memopv4i64, i256mem,
3773                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3774defm VPSUBSWY  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
3775                                  VR256, memopv4i64, i256mem,
3776                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3777defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
3778                                  VR256, memopv4i64, i256mem,
3779                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3780defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
3781                                  VR256, memopv4i64, i256mem,
3782                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
3783defm VPADDSBY  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
3784                                  VR256, memopv4i64, i256mem,
3785                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3786defm VPADDSWY  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
3787                                  VR256, memopv4i64, i256mem,
3788                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3789defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
3790                                  VR256, memopv4i64, i256mem,
3791                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3792defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
3793                                  VR256, memopv4i64, i256mem,
3794                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3795defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
3796                                  VR256, memopv4i64, i256mem,
3797                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
3798defm VPMULHWY  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
3799                                  VR256, memopv4i64, i256mem,
3800                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
3801defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
3802                                  VR256, memopv4i64, i256mem,
3803                                  SSE_PMADD, 1, 0>, VEX_4V;
3804defm VPAVGBY   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
3805                                  VR256, memopv4i64, i256mem,
3806                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3807defm VPAVGWY   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
3808                                  VR256, memopv4i64, i256mem,
3809                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3810defm VPMINUBY  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
3811                                  VR256, memopv4i64, i256mem,
3812                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3813defm VPMINSWY  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
3814                                  VR256, memopv4i64, i256mem,
3815                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3816defm VPMAXUBY  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
3817                                  VR256, memopv4i64, i256mem,
3818                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3819defm VPMAXSWY  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
3820                                  VR256, memopv4i64, i256mem,
3821                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3822defm VPSADBWY  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
3823                                  VR256, memopv4i64, i256mem,
3824                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
3825}
3826
3827let Constraints = "$src1 = $dst" in {
3828defm PADDB  : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64,
3829                           i128mem, SSE_INTALU_ITINS_P, 1>;
3830defm PADDW  : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
3831                           i128mem, SSE_INTALU_ITINS_P, 1>;
3832defm PADDD  : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
3833                           i128mem, SSE_INTALU_ITINS_P, 1>;
3834defm PADDQ  : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64,
3835                           i128mem, SSE_INTALUQ_ITINS_P, 1>;
3836defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
3837                           i128mem, SSE_INTMUL_ITINS_P, 1>;
3838defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
3839                          i128mem, SSE_INTALU_ITINS_P>;
3840defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
3841                          i128mem, SSE_INTALU_ITINS_P>;
3842defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
3843                          i128mem, SSE_INTALU_ITINS_P>;
3844defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64,
3845                          i128mem, SSE_INTALUQ_ITINS_P>;
3846defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
3847                             memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
3848
3849// Intrinsic forms
3850defm PSUBSB  : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
3851                                VR128, memopv2i64, i128mem,
3852                                SSE_INTALU_ITINS_P>;
3853defm PSUBSW  : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
3854                                VR128, memopv2i64, i128mem,
3855                                SSE_INTALU_ITINS_P>;
3856defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b,
3857                                VR128, memopv2i64, i128mem,
3858                                SSE_INTALU_ITINS_P>;
3859defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w,
3860                                VR128, memopv2i64, i128mem,
3861                                SSE_INTALU_ITINS_P>;
3862defm PADDSB  : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
3863                                VR128, memopv2i64, i128mem,
3864                                SSE_INTALU_ITINS_P, 1>;
3865defm PADDSW  : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w,
3866                                VR128, memopv2i64, i128mem,
3867                                SSE_INTALU_ITINS_P, 1>;
3868defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
3869                                VR128, memopv2i64, i128mem,
3870                                SSE_INTALU_ITINS_P, 1>;
3871defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
3872                                VR128, memopv2i64, i128mem,
3873                                SSE_INTALU_ITINS_P, 1>;
3874defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
3875                                VR128, memopv2i64, i128mem,
3876                                SSE_INTMUL_ITINS_P, 1>;
3877defm PMULHW  : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
3878                                VR128, memopv2i64, i128mem,
3879                                SSE_INTMUL_ITINS_P, 1>;
3880defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
3881                                VR128, memopv2i64, i128mem,
3882                                SSE_PMADD, 1>;
3883defm PAVGB   : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
3884                                VR128, memopv2i64, i128mem,
3885                                SSE_INTALU_ITINS_P, 1>;
3886defm PAVGW   : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
3887                                VR128, memopv2i64, i128mem,
3888                                SSE_INTALU_ITINS_P, 1>;
3889defm PMINUB  : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
3890                                VR128, memopv2i64, i128mem,
3891                                SSE_INTALU_ITINS_P, 1>;
3892defm PMINSW  : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
3893                                VR128, memopv2i64, i128mem,
3894                                SSE_INTALU_ITINS_P, 1>;
3895defm PMAXUB  : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
3896                                VR128, memopv2i64, i128mem,
3897                                SSE_INTALU_ITINS_P, 1>;
3898defm PMAXSW  : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
3899                                VR128, memopv2i64, i128mem,
3900                                SSE_INTALU_ITINS_P, 1>;
3901defm PSADBW  : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
3902                                VR128, memopv2i64, i128mem,
3903                                SSE_INTALU_ITINS_P, 1>;
3904
3905} // Constraints = "$src1 = $dst"
3906
3907//===---------------------------------------------------------------------===//
3908// SSE2 - Packed Integer Logical Instructions
3909//===---------------------------------------------------------------------===//
3910
3911let Predicates = [HasAVX] in {
3912defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
3913                            VR128, v8i16, v8i16, bc_v8i16,
3914                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3915defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
3916                            VR128, v4i32, v4i32, bc_v4i32,
3917                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3918defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
3919                            VR128, v2i64, v2i64, bc_v2i64,
3920                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3921
3922defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
3923                            VR128, v8i16, v8i16, bc_v8i16,
3924                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3925defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
3926                            VR128, v4i32, v4i32, bc_v4i32,
3927                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3928defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
3929                            VR128, v2i64, v2i64, bc_v2i64,
3930                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3931
3932defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
3933                            VR128, v8i16, v8i16, bc_v8i16,
3934                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3935defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
3936                            VR128, v4i32, v4i32, bc_v4i32,
3937                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3938
3939let ExeDomain = SSEPackedInt in {
3940  // 128-bit logical shifts.
3941  def VPSLLDQri : PDIi8<0x73, MRM7r,
3942                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
3943                    "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3944                    [(set VR128:$dst,
3945                      (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>,
3946                    VEX_4V;
3947  def VPSRLDQri : PDIi8<0x73, MRM3r,
3948                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
3949                    "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3950                    [(set VR128:$dst,
3951                      (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>,
3952                    VEX_4V;
3953  // PSRADQri doesn't exist in SSE[1-3].
3954}
3955} // Predicates = [HasAVX]
3956
3957let Predicates = [HasAVX2] in {
3958defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
3959                             VR256, v16i16, v8i16, bc_v8i16,
3960                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3961defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
3962                             VR256, v8i32, v4i32, bc_v4i32,
3963                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3964defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
3965                             VR256, v4i64, v2i64, bc_v2i64,
3966                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3967
3968defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
3969                             VR256, v16i16, v8i16, bc_v8i16,
3970                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3971defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
3972                             VR256, v8i32, v4i32, bc_v4i32,
3973                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3974defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
3975                             VR256, v4i64, v2i64, bc_v2i64,
3976                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3977
3978defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
3979                             VR256, v16i16, v8i16, bc_v8i16,
3980                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3981defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
3982                             VR256, v8i32, v4i32, bc_v4i32,
3983                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
3984
3985let ExeDomain = SSEPackedInt in {
3986  // 256-bit logical shifts.
3987  def VPSLLDQYri : PDIi8<0x73, MRM7r,
3988                    (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
3989                    "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3990                    [(set VR256:$dst,
3991                      (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
3992                    VEX_4V;
3993  def VPSRLDQYri : PDIi8<0x73, MRM3r,
3994                    (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
3995                    "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3996                    [(set VR256:$dst,
3997                      (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
3998                    VEX_4V;
3999  // PSRADQYri doesn't exist in SSE[1-3].
4000}
4001} // Predicates = [HasAVX2]
4002
4003let Constraints = "$src1 = $dst" in {
4004defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
4005                           VR128, v8i16, v8i16, bc_v8i16,
4006                           SSE_INTSHIFT_ITINS_P>;
4007defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
4008                           VR128, v4i32, v4i32, bc_v4i32,
4009                           SSE_INTSHIFT_ITINS_P>;
4010defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
4011                           VR128, v2i64, v2i64, bc_v2i64,
4012                           SSE_INTSHIFT_ITINS_P>;
4013
4014defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
4015                           VR128, v8i16, v8i16, bc_v8i16,
4016                           SSE_INTSHIFT_ITINS_P>;
4017defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
4018                           VR128, v4i32, v4i32, bc_v4i32,
4019                           SSE_INTSHIFT_ITINS_P>;
4020defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
4021                           VR128, v2i64, v2i64, bc_v2i64,
4022                           SSE_INTSHIFT_ITINS_P>;
4023
4024defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
4025                           VR128, v8i16, v8i16, bc_v8i16,
4026                           SSE_INTSHIFT_ITINS_P>;
4027defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
4028                           VR128, v4i32, v4i32, bc_v4i32,
4029                           SSE_INTSHIFT_ITINS_P>;
4030
4031let ExeDomain = SSEPackedInt in {
4032  // 128-bit logical shifts.
4033  def PSLLDQri : PDIi8<0x73, MRM7r,
4034                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
4035                       "pslldq\t{$src2, $dst|$dst, $src2}",
4036                       [(set VR128:$dst,
4037                         (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>;
4038  def PSRLDQri : PDIi8<0x73, MRM3r,
4039                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
4040                       "psrldq\t{$src2, $dst|$dst, $src2}",
4041                       [(set VR128:$dst,
4042                         (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>;
4043  // PSRADQri doesn't exist in SSE[1-3].
4044}
4045} // Constraints = "$src1 = $dst"
4046
4047let Predicates = [HasAVX] in {
4048  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
4049            (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4050  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
4051            (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4052  def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
4053            (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4054
4055  // Shift up / down and insert zero's.
4056  def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
4057            (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4058  def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
4059            (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4060}
4061
4062let Predicates = [HasAVX2] in {
4063  def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
4064            (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
4065  def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
4066            (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
4067}
4068
4069let Predicates = [HasSSE2] in {
4070  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
4071            (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4072  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
4073            (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4074  def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
4075            (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
4076
4077  // Shift up / down and insert zero's.
4078  def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
4079            (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4080  def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
4081            (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
4082}
4083
4084//===---------------------------------------------------------------------===//
4085// SSE2 - Packed Integer Comparison Instructions
4086//===---------------------------------------------------------------------===//
4087
4088let Predicates = [HasAVX] in {
4089  defm VPCMPEQB  : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8,
4090                                VR128, memopv2i64, i128mem,
4091                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
4092  defm VPCMPEQW  : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16,
4093                                VR128, memopv2i64, i128mem,
4094                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
4095  defm VPCMPEQD  : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32,
4096                                VR128, memopv2i64, i128mem,
4097                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
4098  defm VPCMPGTB  : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8,
4099                                VR128, memopv2i64, i128mem,
4100                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4101  defm VPCMPGTW  : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16,
4102                                VR128, memopv2i64, i128mem,
4103                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4104  defm VPCMPGTD  : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32,
4105                                VR128, memopv2i64, i128mem,
4106                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4107}
4108
4109let Predicates = [HasAVX2] in {
4110  defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
4111                                VR256, memopv4i64, i256mem,
4112                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
4113  defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
4114                                VR256, memopv4i64, i256mem,
4115                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
4116  defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
4117                                VR256, memopv4i64, i256mem,
4118                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
4119  defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
4120                                VR256, memopv4i64, i256mem,
4121                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4122  defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
4123                                VR256, memopv4i64, i256mem,
4124                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4125  defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
4126                                VR256, memopv4i64, i256mem,
4127                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4128}
4129
4130let Constraints = "$src1 = $dst" in {
4131  defm PCMPEQB  : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8,
4132                               VR128, memopv2i64, i128mem,
4133                               SSE_INTALU_ITINS_P, 1>;
4134  defm PCMPEQW  : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16,
4135                               VR128, memopv2i64, i128mem,
4136                               SSE_INTALU_ITINS_P, 1>;
4137  defm PCMPEQD  : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32,
4138                               VR128, memopv2i64, i128mem,
4139                               SSE_INTALU_ITINS_P, 1>;
4140  defm PCMPGTB  : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8,
4141                               VR128, memopv2i64, i128mem,
4142                               SSE_INTALU_ITINS_P>;
4143  defm PCMPGTW  : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16,
4144                               VR128, memopv2i64, i128mem,
4145                               SSE_INTALU_ITINS_P>;
4146  defm PCMPGTD  : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32,
4147                               VR128, memopv2i64, i128mem,
4148                               SSE_INTALU_ITINS_P>;
4149} // Constraints = "$src1 = $dst"
4150
4151//===---------------------------------------------------------------------===//
4152// SSE2 - Packed Integer Pack Instructions
4153//===---------------------------------------------------------------------===//
4154
4155let Predicates = [HasAVX] in {
4156defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
4157                                  VR128, memopv2i64, i128mem,
4158                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4159defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
4160                                  VR128, memopv2i64, i128mem,
4161                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4162defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
4163                                  VR128, memopv2i64, i128mem,
4164                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4165}
4166
4167let Predicates = [HasAVX2] in {
4168defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
4169                                   VR256, memopv4i64, i256mem,
4170                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4171defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
4172                                   VR256, memopv4i64, i256mem,
4173                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4174defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
4175                                   VR256, memopv4i64, i256mem,
4176                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
4177}
4178
4179let Constraints = "$src1 = $dst" in {
4180defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
4181                                 VR128, memopv2i64, i128mem,
4182                                 SSE_INTALU_ITINS_P>;
4183defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
4184                                 VR128, memopv2i64, i128mem,
4185                                 SSE_INTALU_ITINS_P>;
4186defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
4187                                 VR128, memopv2i64, i128mem,
4188                                 SSE_INTALU_ITINS_P>;
4189} // Constraints = "$src1 = $dst"
4190
4191//===---------------------------------------------------------------------===//
4192// SSE2 - Packed Integer Shuffle Instructions
4193//===---------------------------------------------------------------------===//
4194
4195let ExeDomain = SSEPackedInt in {
4196multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> {
4197def ri : Ii8<0x70, MRMSrcReg,
4198             (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
4199             !strconcat(OpcodeStr,
4200                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4201              [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))],
4202              IIC_SSE_PSHUF>;
4203def mi : Ii8<0x70, MRMSrcMem,
4204             (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
4205             !strconcat(OpcodeStr,
4206                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4207              [(set VR128:$dst,
4208                (vt (OpNode (bitconvert (memopv2i64 addr:$src1)),
4209                             (i8 imm:$src2))))],
4210                             IIC_SSE_PSHUF>;
4211}
4212
4213multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
4214def Yri : Ii8<0x70, MRMSrcReg,
4215              (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
4216              !strconcat(OpcodeStr,
4217                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4218              [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
4219def Ymi : Ii8<0x70, MRMSrcMem,
4220              (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
4221              !strconcat(OpcodeStr,
4222                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4223              [(set VR256:$dst,
4224                (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
4225                             (i8 imm:$src2))))]>;
4226}
4227} // ExeDomain = SSEPackedInt
4228
4229let Predicates = [HasAVX] in {
4230 let AddedComplexity = 5 in
4231  defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX;
4232
4233 // SSE2 with ImmT == Imm8 and XS prefix.
4234  defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX;
4235
4236 // SSE2 with ImmT == Imm8 and XD prefix.
4237  defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX;
4238
4239 def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
4240           (VPSHUFDmi addr:$src1, imm:$imm)>;
4241 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
4242           (VPSHUFDri VR128:$src1, imm:$imm)>;
4243}
4244
4245let Predicates = [HasAVX2] in {
4246  defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX;
4247  defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX;
4248  defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
4249}
4250
4251let Predicates = [HasSSE2] in {
4252 let AddedComplexity = 5 in
4253  defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
4254
4255 // SSE2 with ImmT == Imm8 and XS prefix.
4256  defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS;
4257
4258 // SSE2 with ImmT == Imm8 and XD prefix.
4259  defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD;
4260
4261 def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
4262           (PSHUFDmi addr:$src1, imm:$imm)>;
4263 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
4264           (PSHUFDri VR128:$src1, imm:$imm)>;
4265}
4266
4267//===---------------------------------------------------------------------===//
4268// SSE2 - Packed Integer Unpack Instructions
4269//===---------------------------------------------------------------------===//
4270
4271let ExeDomain = SSEPackedInt in {
4272multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
4273                       SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> {
4274  def rr : PDI<opc, MRMSrcReg,
4275      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
4276      !if(Is2Addr,
4277          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
4278          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4279      [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
4280      IIC_SSE_UNPCK>;
4281  def rm : PDI<opc, MRMSrcMem,
4282      (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
4283      !if(Is2Addr,
4284          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
4285          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
4286      [(set VR128:$dst, (OpNode VR128:$src1,
4287                                  (bc_frag (memopv2i64
4288                                               addr:$src2))))],
4289                                               IIC_SSE_UNPCK>;
4290}
4291
4292multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
4293                         SDNode OpNode, PatFrag bc_frag> {
4294  def Yrr : PDI<opc, MRMSrcReg,
4295      (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
4296      !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4297      [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
4298  def Yrm : PDI<opc, MRMSrcMem,
4299      (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
4300      !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
4301      [(set VR256:$dst, (OpNode VR256:$src1,
4302                                  (bc_frag (memopv4i64 addr:$src2))))]>;
4303}
4304
4305let Predicates = [HasAVX] in {
4306  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
4307                                 bc_v16i8, 0>, VEX_4V;
4308  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
4309                                 bc_v8i16, 0>, VEX_4V;
4310  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
4311                                 bc_v4i32, 0>, VEX_4V;
4312  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
4313                                 bc_v2i64, 0>, VEX_4V;
4314
4315  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
4316                                 bc_v16i8, 0>, VEX_4V;
4317  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
4318                                 bc_v8i16, 0>, VEX_4V;
4319  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
4320                                 bc_v4i32, 0>, VEX_4V;
4321  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
4322                                 bc_v2i64, 0>, VEX_4V;
4323}
4324
4325let Predicates = [HasAVX2] in {
4326  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
4327                                   bc_v32i8>, VEX_4V;
4328  defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
4329                                   bc_v16i16>, VEX_4V;
4330  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
4331                                   bc_v8i32>, VEX_4V;
4332  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
4333                                   bc_v4i64>, VEX_4V;
4334
4335  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
4336                                   bc_v32i8>, VEX_4V;
4337  defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
4338                                   bc_v16i16>, VEX_4V;
4339  defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
4340                                   bc_v8i32>, VEX_4V;
4341  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
4342                                   bc_v4i64>, VEX_4V;
4343}
4344
4345let Constraints = "$src1 = $dst" in {
4346  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
4347                                bc_v16i8>;
4348  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
4349                                bc_v8i16>;
4350  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
4351                                bc_v4i32>;
4352  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
4353                                bc_v2i64>;
4354
4355  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
4356                                bc_v16i8>;
4357  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
4358                                bc_v8i16>;
4359  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
4360                                bc_v4i32>;
4361  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
4362                                bc_v2i64>;
4363}
4364} // ExeDomain = SSEPackedInt
4365
4366// Patterns for using AVX1 instructions with integer vectors
4367// Here to give AVX2 priority
4368let Predicates = [HasAVX] in {
4369  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
4370            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
4371  def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
4372            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
4373  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
4374            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
4375  def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
4376            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
4377
4378  def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
4379            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
4380  def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
4381            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
4382  def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
4383            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
4384  def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
4385            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
4386}
4387
4388//===---------------------------------------------------------------------===//
4389// SSE2 - Packed Integer Extract and Insert
4390//===---------------------------------------------------------------------===//
4391
4392let ExeDomain = SSEPackedInt in {
4393multiclass sse2_pinsrw<bit Is2Addr = 1> {
4394  def rri : Ii8<0xC4, MRMSrcReg,
4395       (outs VR128:$dst), (ins VR128:$src1,
4396        GR32:$src2, i32i8imm:$src3),
4397       !if(Is2Addr,
4398           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
4399           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4400       [(set VR128:$dst,
4401         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
4402  def rmi : Ii8<0xC4, MRMSrcMem,
4403                       (outs VR128:$dst), (ins VR128:$src1,
4404                        i16mem:$src2, i32i8imm:$src3),
4405       !if(Is2Addr,
4406           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
4407           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
4408       [(set VR128:$dst,
4409         (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
4410                    imm:$src3))], IIC_SSE_PINSRW>;
4411}
4412
4413// Extract
4414let Predicates = [HasAVX] in
4415def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
4416                    (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
4417                    "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4418                    [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
4419                                                imm:$src2))]>, TB, OpSize, VEX;
4420def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
4421                    (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
4422                    "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4423                    [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
4424                                                imm:$src2))], IIC_SSE_PEXTRW>;
4425
4426// Insert
4427let Predicates = [HasAVX] in {
4428  defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
4429  def  VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
4430       (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
4431       "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
4432       []>, TB, OpSize, VEX_4V;
4433}
4434
4435let Constraints = "$src1 = $dst" in
4436  defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
4437
4438} // ExeDomain = SSEPackedInt
4439
4440//===---------------------------------------------------------------------===//
4441// SSE2 - Packed Mask Creation
4442//===---------------------------------------------------------------------===//
4443
4444let ExeDomain = SSEPackedInt in {
4445
4446def VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
4447           "pmovmskb\t{$src, $dst|$dst, $src}",
4448           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
4449           IIC_SSE_MOVMSK>, VEX;
4450def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
4451           "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>, VEX;
4452
4453let Predicates = [HasAVX2] in {
4454def VPMOVMSKBYrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
4455           "pmovmskb\t{$src, $dst|$dst, $src}",
4456           [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX;
4457def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
4458           "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
4459}
4460
4461def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
4462           "pmovmskb\t{$src, $dst|$dst, $src}",
4463           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
4464           IIC_SSE_MOVMSK>;
4465
4466} // ExeDomain = SSEPackedInt
4467
4468//===---------------------------------------------------------------------===//
4469// SSE2 - Conditional Store
4470//===---------------------------------------------------------------------===//
4471
4472let ExeDomain = SSEPackedInt in {
4473
4474let Uses = [EDI] in
4475def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
4476           (ins VR128:$src, VR128:$mask),
4477           "maskmovdqu\t{$mask, $src|$src, $mask}",
4478           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
4479           IIC_SSE_MASKMOV>, VEX;
4480let Uses = [RDI] in
4481def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
4482           (ins VR128:$src, VR128:$mask),
4483           "maskmovdqu\t{$mask, $src|$src, $mask}",
4484           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
4485           IIC_SSE_MASKMOV>, VEX;
4486
4487let Uses = [EDI] in
4488def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
4489           "maskmovdqu\t{$mask, $src|$src, $mask}",
4490           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
4491           IIC_SSE_MASKMOV>;
4492let Uses = [RDI] in
4493def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
4494           "maskmovdqu\t{$mask, $src|$src, $mask}",
4495           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
4496           IIC_SSE_MASKMOV>;
4497
4498} // ExeDomain = SSEPackedInt
4499
4500//===---------------------------------------------------------------------===//
4501// SSE2 - Move Doubleword
4502//===---------------------------------------------------------------------===//
4503
4504//===---------------------------------------------------------------------===//
4505// Move Int Doubleword to Packed Double Int
4506//
4507def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
4508                      "movd\t{$src, $dst|$dst, $src}",
4509                      [(set VR128:$dst,
4510                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
4511                        VEX;
4512def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
4513                      "movd\t{$src, $dst|$dst, $src}",
4514                      [(set VR128:$dst,
4515                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
4516                        IIC_SSE_MOVDQ>,
4517                      VEX;
4518def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4519                        "mov{d|q}\t{$src, $dst|$dst, $src}",
4520                        [(set VR128:$dst,
4521                          (v2i64 (scalar_to_vector GR64:$src)))],
4522                          IIC_SSE_MOVDQ>, VEX;
4523def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4524                       "mov{d|q}\t{$src, $dst|$dst, $src}",
4525                       [(set FR64:$dst, (bitconvert GR64:$src))],
4526                       IIC_SSE_MOVDQ>, VEX;
4527
4528def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
4529                      "movd\t{$src, $dst|$dst, $src}",
4530                      [(set VR128:$dst,
4531                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
4532def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
4533                      "movd\t{$src, $dst|$dst, $src}",
4534                      [(set VR128:$dst,
4535                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
4536                        IIC_SSE_MOVDQ>;
4537def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4538                        "mov{d|q}\t{$src, $dst|$dst, $src}",
4539                        [(set VR128:$dst,
4540                          (v2i64 (scalar_to_vector GR64:$src)))],
4541                          IIC_SSE_MOVDQ>;
4542def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
4543                       "mov{d|q}\t{$src, $dst|$dst, $src}",
4544                       [(set FR64:$dst, (bitconvert GR64:$src))],
4545                       IIC_SSE_MOVDQ>;
4546
4547//===---------------------------------------------------------------------===//
4548// Move Int Doubleword to Single Scalar
4549//
4550def VMOVDI2SSrr  : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4551                      "movd\t{$src, $dst|$dst, $src}",
4552                      [(set FR32:$dst, (bitconvert GR32:$src))],
4553                      IIC_SSE_MOVDQ>, VEX;
4554
4555def VMOVDI2SSrm  : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
4556                      "movd\t{$src, $dst|$dst, $src}",
4557                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
4558                      IIC_SSE_MOVDQ>,
4559                      VEX;
4560def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
4561                      "movd\t{$src, $dst|$dst, $src}",
4562                      [(set FR32:$dst, (bitconvert GR32:$src))],
4563                      IIC_SSE_MOVDQ>;
4564
4565def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
4566                      "movd\t{$src, $dst|$dst, $src}",
4567                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
4568                      IIC_SSE_MOVDQ>;
4569
4570//===---------------------------------------------------------------------===//
4571// Move Packed Doubleword Int to Packed Double Int
4572//
4573def VMOVPDI2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
4574                       "movd\t{$src, $dst|$dst, $src}",
4575                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
4576                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
4577def VMOVPDI2DImr  : VPDI<0x7E, MRMDestMem, (outs),
4578                       (ins i32mem:$dst, VR128:$src),
4579                       "movd\t{$src, $dst|$dst, $src}",
4580                       [(store (i32 (vector_extract (v4i32 VR128:$src),
4581                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
4582                                     VEX;
4583def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
4584                       "movd\t{$src, $dst|$dst, $src}",
4585                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
4586                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
4587def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
4588                       "movd\t{$src, $dst|$dst, $src}",
4589                       [(store (i32 (vector_extract (v4i32 VR128:$src),
4590                                     (iPTR 0))), addr:$dst)],
4591                                     IIC_SSE_MOVDQ>;
4592
4593//===---------------------------------------------------------------------===//
4594// Move Packed Doubleword Int first element to Doubleword Int
4595//
4596def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4597                          "mov{d|q}\t{$src, $dst|$dst, $src}",
4598                          [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
4599                                                           (iPTR 0)))],
4600                                                           IIC_SSE_MOVD_ToGP>,
4601                      TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
4602
4603def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4604                        "mov{d|q}\t{$src, $dst|$dst, $src}",
4605                        [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
4606                                                         (iPTR 0)))],
4607                                                         IIC_SSE_MOVD_ToGP>;
4608
4609//===---------------------------------------------------------------------===//
4610// Bitcast FR64 <-> GR64
4611//
4612let Predicates = [HasAVX] in
4613def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
4614                        "vmovq\t{$src, $dst|$dst, $src}",
4615                        [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
4616                        VEX;
4617def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
4618                         "mov{d|q}\t{$src, $dst|$dst, $src}",
4619                         [(set GR64:$dst, (bitconvert FR64:$src))],
4620                         IIC_SSE_MOVDQ>, VEX;
4621def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
4622                         "movq\t{$src, $dst|$dst, $src}",
4623                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
4624                         IIC_SSE_MOVDQ>, VEX;
4625
4626def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
4627                       "movq\t{$src, $dst|$dst, $src}",
4628                       [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
4629                       IIC_SSE_MOVDQ>;
4630def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
4631                       "mov{d|q}\t{$src, $dst|$dst, $src}",
4632                       [(set GR64:$dst, (bitconvert FR64:$src))],
4633                       IIC_SSE_MOVD_ToGP>;
4634def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
4635                       "movq\t{$src, $dst|$dst, $src}",
4636                       [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
4637                       IIC_SSE_MOVDQ>;
4638
4639//===---------------------------------------------------------------------===//
4640// Move Scalar Single to Double Int
4641//
4642def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
4643                      "movd\t{$src, $dst|$dst, $src}",
4644                      [(set GR32:$dst, (bitconvert FR32:$src))],
4645                      IIC_SSE_MOVD_ToGP>, VEX;
4646def VMOVSS2DImr  : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
4647                      "movd\t{$src, $dst|$dst, $src}",
4648                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
4649                      IIC_SSE_MOVDQ>, VEX;
4650def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
4651                      "movd\t{$src, $dst|$dst, $src}",
4652                      [(set GR32:$dst, (bitconvert FR32:$src))],
4653                      IIC_SSE_MOVD_ToGP>;
4654def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
4655                      "movd\t{$src, $dst|$dst, $src}",
4656                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
4657                      IIC_SSE_MOVDQ>;
4658
4659//===---------------------------------------------------------------------===//
4660// Patterns and instructions to describe movd/movq to XMM register zero-extends
4661//
4662let AddedComplexity = 15 in {
4663def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
4664                       "movd\t{$src, $dst|$dst, $src}",
4665                       [(set VR128:$dst, (v4i32 (X86vzmovl
4666                                      (v4i32 (scalar_to_vector GR32:$src)))))],
4667                                      IIC_SSE_MOVDQ>, VEX;
4668def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4669                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
4670                       [(set VR128:$dst, (v2i64 (X86vzmovl
4671                                      (v2i64 (scalar_to_vector GR64:$src)))))],
4672                                      IIC_SSE_MOVDQ>,
4673                                      VEX, VEX_W;
4674}
4675let AddedComplexity = 15 in {
4676def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
4677                       "movd\t{$src, $dst|$dst, $src}",
4678                       [(set VR128:$dst, (v4i32 (X86vzmovl
4679                                      (v4i32 (scalar_to_vector GR32:$src)))))],
4680                                      IIC_SSE_MOVDQ>;
4681def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4682                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
4683                       [(set VR128:$dst, (v2i64 (X86vzmovl
4684                                      (v2i64 (scalar_to_vector GR64:$src)))))],
4685                                      IIC_SSE_MOVDQ>;
4686}
4687
4688let AddedComplexity = 20 in {
4689def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
4690                       "movd\t{$src, $dst|$dst, $src}",
4691                       [(set VR128:$dst,
4692                         (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
4693                                                   (loadi32 addr:$src))))))],
4694                                                   IIC_SSE_MOVDQ>, VEX;
4695def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
4696                       "movd\t{$src, $dst|$dst, $src}",
4697                       [(set VR128:$dst,
4698                         (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
4699                                                   (loadi32 addr:$src))))))],
4700                                                   IIC_SSE_MOVDQ>;
4701}
4702
4703let Predicates = [HasAVX] in {
4704  // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4705  let AddedComplexity = 20 in {
4706    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4707              (VMOVZDI2PDIrm addr:$src)>;
4708    def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4709              (VMOVZDI2PDIrm addr:$src)>;
4710  }
4711  // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4712  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4713                                (v4i32 (scalar_to_vector GR32:$src)),(i32 0)))),
4714            (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
4715  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4716                                (v2i64 (scalar_to_vector GR64:$src)),(i32 0)))),
4717            (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
4718}
4719
4720let Predicates = [HasSSE2], AddedComplexity = 20 in {
4721  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4722            (MOVZDI2PDIrm addr:$src)>;
4723  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4724            (MOVZDI2PDIrm addr:$src)>;
4725}
4726
4727// These are the correct encodings of the instructions so that we know how to
4728// read correct assembly, even though we continue to emit the wrong ones for
4729// compatibility with Darwin's buggy assembler.
4730def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4731                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
4732def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4733                (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
4734def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4735                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
4736def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4737                (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
4738def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4739                (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
4740def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
4741                (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
4742
4743//===---------------------------------------------------------------------===//
4744// SSE2 - Move Quadword
4745//===---------------------------------------------------------------------===//
4746
4747//===---------------------------------------------------------------------===//
4748// Move Quadword Int to Packed Quadword Int
4749//
4750def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4751                    "vmovq\t{$src, $dst|$dst, $src}",
4752                    [(set VR128:$dst,
4753                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
4754                    VEX, Requires<[HasAVX]>;
4755def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4756                    "movq\t{$src, $dst|$dst, $src}",
4757                    [(set VR128:$dst,
4758                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))],
4759                      IIC_SSE_MOVDQ>, XS,
4760                    Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
4761
4762//===---------------------------------------------------------------------===//
4763// Move Packed Quadword Int to Quadword Int
4764//
4765def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4766                      "movq\t{$src, $dst|$dst, $src}",
4767                      [(store (i64 (vector_extract (v2i64 VR128:$src),
4768                                    (iPTR 0))), addr:$dst)],
4769                                    IIC_SSE_MOVDQ>, VEX;
4770def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4771                      "movq\t{$src, $dst|$dst, $src}",
4772                      [(store (i64 (vector_extract (v2i64 VR128:$src),
4773                                    (iPTR 0))), addr:$dst)],
4774                                    IIC_SSE_MOVDQ>;
4775
4776//===---------------------------------------------------------------------===//
4777// Store / copy lower 64-bits of a XMM register.
4778//
4779def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4780                     "movq\t{$src, $dst|$dst, $src}",
4781                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
4782def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
4783                     "movq\t{$src, $dst|$dst, $src}",
4784                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
4785                     IIC_SSE_MOVDQ>;
4786
4787let AddedComplexity = 20 in
4788def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4789                     "vmovq\t{$src, $dst|$dst, $src}",
4790                     [(set VR128:$dst,
4791                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
4792                                                 (loadi64 addr:$src))))))],
4793                                                 IIC_SSE_MOVDQ>,
4794                     XS, VEX, Requires<[HasAVX]>;
4795
4796let AddedComplexity = 20 in
4797def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
4798                     "movq\t{$src, $dst|$dst, $src}",
4799                     [(set VR128:$dst,
4800                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
4801                                                 (loadi64 addr:$src))))))],
4802                                                 IIC_SSE_MOVDQ>,
4803                     XS, Requires<[HasSSE2]>;
4804
4805let Predicates = [HasAVX], AddedComplexity = 20 in {
4806  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4807            (VMOVZQI2PQIrm addr:$src)>;
4808  def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
4809            (VMOVZQI2PQIrm addr:$src)>;
4810  def : Pat<(v2i64 (X86vzload addr:$src)),
4811            (VMOVZQI2PQIrm addr:$src)>;
4812}
4813
4814let Predicates = [HasSSE2], AddedComplexity = 20 in {
4815  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4816            (MOVZQI2PQIrm addr:$src)>;
4817  def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
4818            (MOVZQI2PQIrm addr:$src)>;
4819  def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
4820}
4821
4822let Predicates = [HasAVX] in {
4823def : Pat<(v4i64 (alignedX86vzload addr:$src)),
4824          (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
4825def : Pat<(v4i64 (X86vzload addr:$src)),
4826          (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>;
4827}
4828
4829//===---------------------------------------------------------------------===//
4830// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
4831// IA32 document. movq xmm1, xmm2 does clear the high bits.
4832//
4833let AddedComplexity = 15 in
4834def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4835                        "vmovq\t{$src, $dst|$dst, $src}",
4836                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
4837                    IIC_SSE_MOVQ_RR>,
4838                      XS, VEX, Requires<[HasAVX]>;
4839let AddedComplexity = 15 in
4840def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4841                        "movq\t{$src, $dst|$dst, $src}",
4842                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
4843                    IIC_SSE_MOVQ_RR>,
4844                      XS, Requires<[HasSSE2]>;
4845
4846let AddedComplexity = 20 in
4847def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4848                        "vmovq\t{$src, $dst|$dst, $src}",
4849                    [(set VR128:$dst, (v2i64 (X86vzmovl
4850                                             (loadv2i64 addr:$src))))],
4851                                             IIC_SSE_MOVDQ>,
4852                      XS, VEX, Requires<[HasAVX]>;
4853let AddedComplexity = 20 in {
4854def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
4855                        "movq\t{$src, $dst|$dst, $src}",
4856                    [(set VR128:$dst, (v2i64 (X86vzmovl
4857                                             (loadv2i64 addr:$src))))],
4858                                             IIC_SSE_MOVDQ>,
4859                      XS, Requires<[HasSSE2]>;
4860}
4861
4862let AddedComplexity = 20 in {
4863  let Predicates = [HasAVX] in {
4864    def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4865              (VMOVZPQILo2PQIrm addr:$src)>;
4866    def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
4867              (VMOVZPQILo2PQIrr VR128:$src)>;
4868  }
4869  let Predicates = [HasSSE2] in {
4870    def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4871              (MOVZPQILo2PQIrm addr:$src)>;
4872    def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
4873              (MOVZPQILo2PQIrr VR128:$src)>;
4874  }
4875}
4876
4877// Instructions to match in the assembler
4878def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
4879                      "movq\t{$src, $dst|$dst, $src}", [],
4880                      IIC_SSE_MOVDQ>, VEX, VEX_W;
4881def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4882                      "movq\t{$src, $dst|$dst, $src}", [],
4883                      IIC_SSE_MOVDQ>, VEX, VEX_W;
4884// Recognize "movd" with GR64 destination, but encode as a "movq"
4885def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
4886                          "movd\t{$src, $dst|$dst, $src}", [],
4887                          IIC_SSE_MOVDQ>, VEX, VEX_W;
4888
4889// Instructions for the disassembler
4890// xr = XMM register
4891// xm = mem64
4892
4893let Predicates = [HasAVX] in
4894def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4895                 "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
4896def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4897                 "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
4898
4899//===---------------------------------------------------------------------===//
4900// SSE3 - Conversion Instructions
4901//===---------------------------------------------------------------------===//
4902
4903// Convert Packed Double FP to Packed DW Integers
4904let Predicates = [HasAVX] in {
4905// The assembler can recognize rr 256-bit instructions by seeing a ymm
4906// register, but the same isn't true when using memory operands instead.
4907// Provide other assembly rr and rm forms to address this explicitly.
4908def VCVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4909                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
4910def VCVTPD2DQXrYr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
4911                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
4912
4913// XMM only
4914def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4915                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
4916def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
4917                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
4918
4919// YMM only
4920def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
4921                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
4922def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
4923                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
4924}
4925
4926def CVTPD2DQrm  : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
4927                       "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
4928                       IIC_SSE_CVT_PD_RM>;
4929def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4930                       "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
4931                       IIC_SSE_CVT_PD_RR>;
4932
4933def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
4934          (VCVTTPD2DQYrr VR256:$src)>;
4935def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
4936          (VCVTTPD2DQYrm addr:$src)>;
4937
4938// Convert Packed DW Integers to Packed Double FP
4939let Predicates = [HasAVX] in {
4940def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
4941                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
4942def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4943                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
4944def VCVTDQ2PDYrm  : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
4945                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
4946def VCVTDQ2PDYrr  : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
4947                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
4948}
4949
4950def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
4951                       "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
4952                       IIC_SSE_CVT_PD_RR>;
4953def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
4954                       "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
4955                       IIC_SSE_CVT_PD_RM>;
4956
4957// AVX 256-bit register conversion intrinsics
4958def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
4959           (VCVTDQ2PDYrr VR128:$src)>;
4960def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
4961           (VCVTDQ2PDYrm addr:$src)>;
4962
4963def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
4964          (VCVTPD2DQYrr VR256:$src)>;
4965def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
4966          (VCVTPD2DQYrm addr:$src)>;
4967
4968def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
4969          (VCVTDQ2PDYrr VR128:$src)>;
4970def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
4971          (VCVTDQ2PDYrm addr:$src)>;
4972
4973//===---------------------------------------------------------------------===//
4974// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
4975//===---------------------------------------------------------------------===//
4976multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
4977                              ValueType vt, RegisterClass RC, PatFrag mem_frag,
4978                              X86MemOperand x86memop> {
4979def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
4980                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4981                      [(set RC:$dst, (vt (OpNode RC:$src)))],
4982                      IIC_SSE_MOV_LH>;
4983def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
4984                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4985                      [(set RC:$dst, (OpNode (mem_frag addr:$src)))],
4986                      IIC_SSE_MOV_LH>;
4987}
4988
4989let Predicates = [HasAVX] in {
4990  defm VMOVSHDUP  : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
4991                                       v4f32, VR128, memopv4f32, f128mem>, VEX;
4992  defm VMOVSLDUP  : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
4993                                       v4f32, VR128, memopv4f32, f128mem>, VEX;
4994  defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
4995                                       v8f32, VR256, memopv8f32, f256mem>, VEX;
4996  defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
4997                                       v8f32, VR256, memopv8f32, f256mem>, VEX;
4998}
4999defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
5000                                   memopv4f32, f128mem>;
5001defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
5002                                   memopv4f32, f128mem>;
5003
5004let Predicates = [HasAVX] in {
5005  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
5006            (VMOVSHDUPrr VR128:$src)>;
5007  def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
5008            (VMOVSHDUPrm addr:$src)>;
5009  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
5010            (VMOVSLDUPrr VR128:$src)>;
5011  def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
5012            (VMOVSLDUPrm addr:$src)>;
5013  def : Pat<(v8i32 (X86Movshdup VR256:$src)),
5014            (VMOVSHDUPYrr VR256:$src)>;
5015  def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))),
5016            (VMOVSHDUPYrm addr:$src)>;
5017  def : Pat<(v8i32 (X86Movsldup VR256:$src)),
5018            (VMOVSLDUPYrr VR256:$src)>;
5019  def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))),
5020            (VMOVSLDUPYrm addr:$src)>;
5021}
5022
5023let Predicates = [HasSSE3] in {
5024  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
5025            (MOVSHDUPrr VR128:$src)>;
5026  def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
5027            (MOVSHDUPrm addr:$src)>;
5028  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
5029            (MOVSLDUPrr VR128:$src)>;
5030  def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
5031            (MOVSLDUPrm addr:$src)>;
5032}
5033
5034//===---------------------------------------------------------------------===//
5035// SSE3 - Replicate Double FP - MOVDDUP
5036//===---------------------------------------------------------------------===//
5037
5038multiclass sse3_replicate_dfp<string OpcodeStr> {
5039let neverHasSideEffects = 1 in
5040def rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
5041                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5042                    [], IIC_SSE_MOV_LH>;
5043def rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
5044                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5045                    [(set VR128:$dst,
5046                      (v2f64 (X86Movddup
5047                              (scalar_to_vector (loadf64 addr:$src)))))],
5048                              IIC_SSE_MOV_LH>;
5049}
5050
5051// FIXME: Merge with above classe when there're patterns for the ymm version
5052multiclass sse3_replicate_dfp_y<string OpcodeStr> {
5053def rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
5054                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5055                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
5056def rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
5057                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5058                    [(set VR256:$dst,
5059                      (v4f64 (X86Movddup
5060                              (scalar_to_vector (loadf64 addr:$src)))))]>;
5061}
5062
5063let Predicates = [HasAVX] in {
5064  defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup">, VEX;
5065  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
5066}
5067
5068defm MOVDDUP : sse3_replicate_dfp<"movddup">;
5069
5070let Predicates = [HasAVX] in {
5071  def : Pat<(X86Movddup (memopv2f64 addr:$src)),
5072            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5073  def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
5074            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5075  def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
5076            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5077  def : Pat<(X86Movddup (bc_v2f64
5078                             (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
5079            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
5080
5081  // 256-bit version
5082  def : Pat<(X86Movddup (memopv4f64 addr:$src)),
5083            (VMOVDDUPYrm addr:$src)>;
5084  def : Pat<(X86Movddup (memopv4i64 addr:$src)),
5085            (VMOVDDUPYrm addr:$src)>;
5086  def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
5087            (VMOVDDUPYrm addr:$src)>;
5088  def : Pat<(X86Movddup (v4i64 VR256:$src)),
5089            (VMOVDDUPYrr VR256:$src)>;
5090}
5091
5092let Predicates = [HasSSE3] in {
5093  def : Pat<(X86Movddup (memopv2f64 addr:$src)),
5094            (MOVDDUPrm addr:$src)>;
5095  def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
5096            (MOVDDUPrm addr:$src)>;
5097  def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
5098            (MOVDDUPrm addr:$src)>;
5099  def : Pat<(X86Movddup (bc_v2f64
5100                             (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
5101            (MOVDDUPrm addr:$src)>;
5102}
5103
5104//===---------------------------------------------------------------------===//
5105// SSE3 - Move Unaligned Integer
5106//===---------------------------------------------------------------------===//
5107
5108let Predicates = [HasAVX] in {
5109  def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
5110                   "vlddqu\t{$src, $dst|$dst, $src}",
5111                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
5112  def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
5113                   "vlddqu\t{$src, $dst|$dst, $src}",
5114                   [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
5115}
5116def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
5117                   "lddqu\t{$src, $dst|$dst, $src}",
5118                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
5119                   IIC_SSE_LDDQU>;
5120
5121//===---------------------------------------------------------------------===//
5122// SSE3 - Arithmetic
5123//===---------------------------------------------------------------------===//
5124
5125multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
5126                       X86MemOperand x86memop, OpndItins itins,
5127                       bit Is2Addr = 1> {
5128  def rr : I<0xD0, MRMSrcReg,
5129       (outs RC:$dst), (ins RC:$src1, RC:$src2),
5130       !if(Is2Addr,
5131           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5132           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5133       [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>;
5134  def rm : I<0xD0, MRMSrcMem,
5135       (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
5136       !if(Is2Addr,
5137           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5138           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5139       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>;
5140}
5141
5142let Predicates = [HasAVX] in {
5143  let ExeDomain = SSEPackedSingle in {
5144    defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
5145                                 f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
5146    defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
5147                                 f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
5148  }
5149  let ExeDomain = SSEPackedDouble in {
5150    defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
5151                                 f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
5152    defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
5153                                 f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
5154  }
5155}
5156let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in {
5157  let ExeDomain = SSEPackedSingle in
5158  defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
5159                              f128mem, SSE_ALU_F32P>, TB, XD;
5160  let ExeDomain = SSEPackedDouble in
5161  defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
5162                              f128mem, SSE_ALU_F64P>, TB, OpSize;
5163}
5164
5165//===---------------------------------------------------------------------===//
5166// SSE3 Instructions
5167//===---------------------------------------------------------------------===//
5168
5169// Horizontal ops
5170multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
5171                   X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
5172  def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
5173       !if(Is2Addr,
5174         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5175         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5176      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
5177
5178  def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
5179       !if(Is2Addr,
5180         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5181         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5182      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
5183        IIC_SSE_HADDSUB_RM>;
5184}
5185multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
5186                  X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
5187  def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
5188       !if(Is2Addr,
5189         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5190         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5191      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
5192
5193  def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
5194       !if(Is2Addr,
5195         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5196         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5197      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
5198        IIC_SSE_HADDSUB_RM>;
5199}
5200
5201let Predicates = [HasAVX] in {
5202  let ExeDomain = SSEPackedSingle in {
5203    defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
5204                            X86fhadd, 0>, VEX_4V;
5205    defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
5206                            X86fhsub, 0>, VEX_4V;
5207    defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
5208                            X86fhadd, 0>, VEX_4V;
5209    defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
5210                            X86fhsub, 0>, VEX_4V;
5211  }
5212  let ExeDomain = SSEPackedDouble in {
5213    defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
5214                            X86fhadd, 0>, VEX_4V;
5215    defm VHSUBPD  : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
5216                            X86fhsub, 0>, VEX_4V;
5217    defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
5218                            X86fhadd, 0>, VEX_4V;
5219    defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
5220                            X86fhsub, 0>, VEX_4V;
5221  }
5222}
5223
5224let Constraints = "$src1 = $dst" in {
5225  let ExeDomain = SSEPackedSingle in {
5226    defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
5227    defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
5228  }
5229  let ExeDomain = SSEPackedDouble in {
5230    defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
5231    defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
5232  }
5233}
5234
5235//===---------------------------------------------------------------------===//
5236// SSSE3 - Packed Absolute Instructions
5237//===---------------------------------------------------------------------===//
5238
5239
5240/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
5241multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
5242                            Intrinsic IntId128> {
5243  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
5244                    (ins VR128:$src),
5245                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5246                    [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
5247                    OpSize;
5248
5249  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
5250                    (ins i128mem:$src),
5251                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5252                    [(set VR128:$dst,
5253                      (IntId128
5254                       (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
5255                    OpSize;
5256}
5257
5258/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
5259multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
5260                              Intrinsic IntId256> {
5261  def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
5262                    (ins VR256:$src),
5263                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5264                    [(set VR256:$dst, (IntId256 VR256:$src))]>,
5265                    OpSize;
5266
5267  def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
5268                    (ins i256mem:$src),
5269                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5270                    [(set VR256:$dst,
5271                      (IntId256
5272                       (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
5273}
5274
5275let Predicates = [HasAVX] in {
5276  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb",
5277                                  int_x86_ssse3_pabs_b_128>, VEX;
5278  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw",
5279                                  int_x86_ssse3_pabs_w_128>, VEX;
5280  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd",
5281                                  int_x86_ssse3_pabs_d_128>, VEX;
5282}
5283
5284let Predicates = [HasAVX2] in {
5285  defm VPABSB  : SS3I_unop_rm_int_y<0x1C, "vpabsb",
5286                                    int_x86_avx2_pabs_b>, VEX;
5287  defm VPABSW  : SS3I_unop_rm_int_y<0x1D, "vpabsw",
5288                                    int_x86_avx2_pabs_w>, VEX;
5289  defm VPABSD  : SS3I_unop_rm_int_y<0x1E, "vpabsd",
5290                                    int_x86_avx2_pabs_d>, VEX;
5291}
5292
5293defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
5294                              int_x86_ssse3_pabs_b_128>;
5295defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
5296                              int_x86_ssse3_pabs_w_128>;
5297defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
5298                              int_x86_ssse3_pabs_d_128>;
5299
5300//===---------------------------------------------------------------------===//
5301// SSSE3 - Packed Binary Operator Instructions
5302//===---------------------------------------------------------------------===//
5303
5304def SSE_PHADDSUBD : OpndItins<
5305  IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
5306>;
5307def SSE_PHADDSUBSW : OpndItins<
5308  IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM
5309>;
5310def SSE_PHADDSUBW : OpndItins<
5311  IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
5312>;
5313def SSE_PSHUFB : OpndItins<
5314  IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
5315>;
5316def SSE_PSIGN : OpndItins<
5317  IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
5318>;
5319def SSE_PMULHRSW : OpndItins<
5320  IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
5321>;
5322
5323/// SS3I_binop_rm - Simple SSSE3 bin op
5324multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5325                         ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
5326                         X86MemOperand x86memop, OpndItins itins,
5327                         bit Is2Addr = 1> {
5328  let isCommutable = 1 in
5329  def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
5330       (ins RC:$src1, RC:$src2),
5331       !if(Is2Addr,
5332         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5333         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5334       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
5335       OpSize;
5336  def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
5337       (ins RC:$src1, x86memop:$src2),
5338       !if(Is2Addr,
5339         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5340         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5341       [(set RC:$dst,
5342         (OpVT (OpNode RC:$src1,
5343          (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize;
5344}
5345
5346/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
5347multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
5348                             Intrinsic IntId128, OpndItins itins,
5349                             bit Is2Addr = 1> {
5350  let isCommutable = 1 in
5351  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
5352       (ins VR128:$src1, VR128:$src2),
5353       !if(Is2Addr,
5354         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5355         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5356       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
5357       OpSize;
5358  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
5359       (ins VR128:$src1, i128mem:$src2),
5360       !if(Is2Addr,
5361         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
5362         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
5363       [(set VR128:$dst,
5364         (IntId128 VR128:$src1,
5365          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
5366}
5367
5368multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
5369                               Intrinsic IntId256> {
5370  let isCommutable = 1 in
5371  def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
5372       (ins VR256:$src1, VR256:$src2),
5373       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5374       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
5375       OpSize;
5376  def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
5377       (ins VR256:$src1, i256mem:$src2),
5378       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5379       [(set VR256:$dst,
5380         (IntId256 VR256:$src1,
5381          (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
5382}
5383
5384let ImmT = NoImm, Predicates = [HasAVX] in {
5385let isCommutable = 0 in {
5386  defm VPHADDW    : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
5387                                  memopv2i64, i128mem,
5388                                  SSE_PHADDSUBW, 0>, VEX_4V;
5389  defm VPHADDD    : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
5390                                  memopv2i64, i128mem,
5391                                  SSE_PHADDSUBD, 0>, VEX_4V;
5392  defm VPHSUBW    : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
5393                                  memopv2i64, i128mem,
5394                                  SSE_PHADDSUBW, 0>, VEX_4V;
5395  defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
5396                                  memopv2i64, i128mem,
5397                                  SSE_PHADDSUBD, 0>, VEX_4V;
5398  defm VPSIGNB    : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128,
5399                                  memopv2i64, i128mem,
5400                                  SSE_PSIGN, 0>, VEX_4V;
5401  defm VPSIGNW    : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128,
5402                                  memopv2i64, i128mem,
5403                                  SSE_PSIGN, 0>, VEX_4V;
5404  defm VPSIGND    : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128,
5405                                  memopv2i64, i128mem,
5406                                  SSE_PSIGN, 0>, VEX_4V;
5407  defm VPSHUFB    : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
5408                                  memopv2i64, i128mem,
5409                                  SSE_PSHUFB, 0>, VEX_4V;
5410  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
5411                                      int_x86_ssse3_phadd_sw_128,
5412                                      SSE_PHADDSUBSW, 0>, VEX_4V;
5413  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
5414                                      int_x86_ssse3_phsub_sw_128,
5415                                      SSE_PHADDSUBSW, 0>, VEX_4V;
5416  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
5417                                      int_x86_ssse3_pmadd_ub_sw_128,
5418                                      SSE_PMADD, 0>, VEX_4V;
5419}
5420defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
5421                                      int_x86_ssse3_pmul_hr_sw_128,
5422                                      SSE_PMULHRSW, 0>, VEX_4V;
5423}
5424
5425let ImmT = NoImm, Predicates = [HasAVX2] in {
5426let isCommutable = 0 in {
5427  defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
5428                                  memopv4i64, i256mem,
5429                                  SSE_PHADDSUBW, 0>, VEX_4V;
5430  defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
5431                                  memopv4i64, i256mem,
5432                                  SSE_PHADDSUBW, 0>, VEX_4V;
5433  defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
5434                                  memopv4i64, i256mem,
5435                                  SSE_PHADDSUBW, 0>, VEX_4V;
5436  defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
5437                                  memopv4i64, i256mem,
5438                                  SSE_PHADDSUBW, 0>, VEX_4V;
5439  defm VPSIGNBY   : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
5440                                  memopv4i64, i256mem,
5441                                  SSE_PHADDSUBW, 0>, VEX_4V;
5442  defm VPSIGNWY   : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
5443                                  memopv4i64, i256mem,
5444                                  SSE_PHADDSUBW, 0>, VEX_4V;
5445  defm VPSIGNDY   : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
5446                                  memopv4i64, i256mem,
5447                                  SSE_PHADDSUBW, 0>, VEX_4V;
5448  defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
5449                                  memopv4i64, i256mem,
5450                                  SSE_PHADDSUBW, 0>, VEX_4V;
5451  defm VPHADDSW   : SS3I_binop_rm_int_y<0x03, "vphaddsw",
5452                                        int_x86_avx2_phadd_sw>, VEX_4V;
5453  defm VPHSUBSW   : SS3I_binop_rm_int_y<0x07, "vphsubsw",
5454                                        int_x86_avx2_phsub_sw>, VEX_4V;
5455  defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
5456                                        int_x86_avx2_pmadd_ub_sw>, VEX_4V;
5457}
5458defm VPMULHRSW    : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
5459                                        int_x86_avx2_pmul_hr_sw>, VEX_4V;
5460}
5461
5462// None of these have i8 immediate fields.
5463let ImmT = NoImm, Constraints = "$src1 = $dst" in {
5464let isCommutable = 0 in {
5465  defm PHADDW    : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
5466                                 memopv2i64, i128mem, SSE_PHADDSUBW>;
5467  defm PHADDD    : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
5468                                 memopv2i64, i128mem, SSE_PHADDSUBD>;
5469  defm PHSUBW    : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
5470                                 memopv2i64, i128mem, SSE_PHADDSUBW>;
5471  defm PHSUBD    : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
5472                                 memopv2i64, i128mem, SSE_PHADDSUBD>;
5473  defm PSIGNB    : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128,
5474                                 memopv2i64, i128mem, SSE_PSIGN>;
5475  defm PSIGNW    : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128,
5476                                 memopv2i64, i128mem, SSE_PSIGN>;
5477  defm PSIGND    : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128,
5478                                 memopv2i64, i128mem, SSE_PSIGN>;
5479  defm PSHUFB    : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128,
5480                                 memopv2i64, i128mem, SSE_PSHUFB>;
5481  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
5482                                     int_x86_ssse3_phadd_sw_128,
5483                                     SSE_PHADDSUBSW>;
5484  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
5485                                     int_x86_ssse3_phsub_sw_128,
5486                                     SSE_PHADDSUBSW>;
5487  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
5488                                     int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>;
5489}
5490defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw",
5491                                     int_x86_ssse3_pmul_hr_sw_128,
5492                                     SSE_PMULHRSW>;
5493}
5494
5495//===---------------------------------------------------------------------===//
5496// SSSE3 - Packed Align Instruction Patterns
5497//===---------------------------------------------------------------------===//
5498
5499multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
5500  let neverHasSideEffects = 1 in {
5501  def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
5502      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
5503      !if(Is2Addr,
5504        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5505        !strconcat(asm,
5506                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5507      [], IIC_SSE_PALIGNR>, OpSize;
5508  let mayLoad = 1 in
5509  def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
5510      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
5511      !if(Is2Addr,
5512        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
5513        !strconcat(asm,
5514                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
5515      [], IIC_SSE_PALIGNR>, OpSize;
5516  }
5517}
5518
5519multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
5520  let neverHasSideEffects = 1 in {
5521  def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
5522      (ins VR256:$src1, VR256:$src2, i8imm:$src3),
5523      !strconcat(asm,
5524                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5525      []>, OpSize;
5526  let mayLoad = 1 in
5527  def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
5528      (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
5529      !strconcat(asm,
5530                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
5531      []>, OpSize;
5532  }
5533}
5534
5535let Predicates = [HasAVX] in
5536  defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
5537let Predicates = [HasAVX2] in
5538  defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
5539let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
5540  defm PALIGN : ssse3_palign<"palignr">;
5541
5542let Predicates = [HasAVX2] in {
5543def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5544          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
5545def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5546          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
5547def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5548          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
5549def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
5550          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
5551}
5552
5553let Predicates = [HasAVX] in {
5554def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5555          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
5556def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5557          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
5558def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5559          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
5560def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5561          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
5562}
5563
5564let Predicates = [HasSSSE3] in {
5565def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5566          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
5567def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5568          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
5569def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5570          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
5571def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
5572          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
5573}
5574
5575//===---------------------------------------------------------------------===//
5576// SSSE3 - Thread synchronization
5577//===---------------------------------------------------------------------===//
5578
5579let usesCustomInserter = 1 in {
5580def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
5581                [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
5582                Requires<[HasSSE3]>;
5583def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
5584                [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
5585                Requires<[HasSSE3]>;
5586}
5587
5588let Uses = [EAX, ECX, EDX] in
5589def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>,
5590                 TB, Requires<[HasSSE3]>;
5591let Uses = [ECX, EAX] in
5592def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>,
5593                TB, Requires<[HasSSE3]>;
5594
5595def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
5596def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
5597
5598def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
5599      Requires<[In32BitMode]>;
5600def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
5601      Requires<[In64BitMode]>;
5602
5603//===----------------------------------------------------------------------===//
5604// SSE4.1 - Packed Move with Sign/Zero Extend
5605//===----------------------------------------------------------------------===//
5606
5607multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
5608  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
5609                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5610                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
5611
5612  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
5613                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5614       [(set VR128:$dst,
5615         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
5616       OpSize;
5617}
5618
5619multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
5620                                 Intrinsic IntId> {
5621  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
5622                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5623                  [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
5624
5625  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
5626                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5627                  [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
5628}
5629
5630let Predicates = [HasAVX] in {
5631defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
5632                                     VEX;
5633defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
5634                                     VEX;
5635defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
5636                                     VEX;
5637defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
5638                                     VEX;
5639defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
5640                                     VEX;
5641defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
5642                                     VEX;
5643}
5644
5645let Predicates = [HasAVX2] in {
5646defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
5647                                        int_x86_avx2_pmovsxbw>, VEX;
5648defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
5649                                        int_x86_avx2_pmovsxwd>, VEX;
5650defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
5651                                        int_x86_avx2_pmovsxdq>, VEX;
5652defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
5653                                        int_x86_avx2_pmovzxbw>, VEX;
5654defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
5655                                        int_x86_avx2_pmovzxwd>, VEX;
5656defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
5657                                        int_x86_avx2_pmovzxdq>, VEX;
5658}
5659
5660defm PMOVSXBW   : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
5661defm PMOVSXWD   : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
5662defm PMOVSXDQ   : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
5663defm PMOVZXBW   : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
5664defm PMOVZXWD   : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
5665defm PMOVZXDQ   : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
5666
5667let Predicates = [HasAVX] in {
5668  // Common patterns involving scalar load.
5669  def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
5670            (VPMOVSXBWrm addr:$src)>;
5671  def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
5672            (VPMOVSXBWrm addr:$src)>;
5673
5674  def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
5675            (VPMOVSXWDrm addr:$src)>;
5676  def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
5677            (VPMOVSXWDrm addr:$src)>;
5678
5679  def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
5680            (VPMOVSXDQrm addr:$src)>;
5681  def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
5682            (VPMOVSXDQrm addr:$src)>;
5683
5684  def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
5685            (VPMOVZXBWrm addr:$src)>;
5686  def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
5687            (VPMOVZXBWrm addr:$src)>;
5688
5689  def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
5690            (VPMOVZXWDrm addr:$src)>;
5691  def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
5692            (VPMOVZXWDrm addr:$src)>;
5693
5694  def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
5695            (VPMOVZXDQrm addr:$src)>;
5696  def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
5697            (VPMOVZXDQrm addr:$src)>;
5698}
5699
5700let Predicates = [HasSSE41] in {
5701  // Common patterns involving scalar load.
5702  def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
5703            (PMOVSXBWrm addr:$src)>;
5704  def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
5705            (PMOVSXBWrm addr:$src)>;
5706
5707  def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
5708            (PMOVSXWDrm addr:$src)>;
5709  def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
5710            (PMOVSXWDrm addr:$src)>;
5711
5712  def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
5713            (PMOVSXDQrm addr:$src)>;
5714  def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
5715            (PMOVSXDQrm addr:$src)>;
5716
5717  def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
5718            (PMOVZXBWrm addr:$src)>;
5719  def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
5720            (PMOVZXBWrm addr:$src)>;
5721
5722  def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
5723            (PMOVZXWDrm addr:$src)>;
5724  def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
5725            (PMOVZXWDrm addr:$src)>;
5726
5727  def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
5728            (PMOVZXDQrm addr:$src)>;
5729  def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
5730            (PMOVZXDQrm addr:$src)>;
5731}
5732
5733let Predicates = [HasAVX] in {
5734def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
5735def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
5736}
5737
5738let Predicates = [HasSSE41] in {
5739def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
5740def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
5741}
5742
5743
5744multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
5745  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
5746                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5747                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
5748
5749  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
5750                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5751       [(set VR128:$dst,
5752         (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
5753          OpSize;
5754}
5755
5756multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr,
5757                                 Intrinsic IntId> {
5758  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
5759                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5760                  [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
5761
5762  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src),
5763                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5764       [(set VR256:$dst,
5765         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
5766          OpSize;
5767}
5768
5769let Predicates = [HasAVX] in {
5770defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
5771                                     VEX;
5772defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
5773                                     VEX;
5774defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
5775                                     VEX;
5776defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
5777                                     VEX;
5778}
5779
5780let Predicates = [HasAVX2] in {
5781defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
5782                                       int_x86_avx2_pmovsxbd>, VEX;
5783defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
5784                                       int_x86_avx2_pmovsxwq>, VEX;
5785defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
5786                                       int_x86_avx2_pmovzxbd>, VEX;
5787defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
5788                                       int_x86_avx2_pmovzxwq>, VEX;
5789}
5790
5791defm PMOVSXBD   : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
5792defm PMOVSXWQ   : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
5793defm PMOVZXBD   : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
5794defm PMOVZXWQ   : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
5795
5796let Predicates = [HasAVX] in {
5797  // Common patterns involving scalar load
5798  def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
5799            (VPMOVSXBDrm addr:$src)>;
5800  def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
5801            (VPMOVSXWQrm addr:$src)>;
5802
5803  def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
5804            (VPMOVZXBDrm addr:$src)>;
5805  def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
5806            (VPMOVZXWQrm addr:$src)>;
5807}
5808
5809let Predicates = [HasSSE41] in {
5810  // Common patterns involving scalar load
5811  def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
5812            (PMOVSXBDrm addr:$src)>;
5813  def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
5814            (PMOVSXWQrm addr:$src)>;
5815
5816  def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
5817            (PMOVZXBDrm addr:$src)>;
5818  def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
5819            (PMOVZXWQrm addr:$src)>;
5820}
5821
5822multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
5823  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
5824                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5825                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
5826
5827  // Expecting a i16 load any extended to i32 value.
5828  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
5829                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5830                 [(set VR128:$dst, (IntId (bitconvert
5831                     (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
5832                 OpSize;
5833}
5834
5835multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr,
5836                                 Intrinsic IntId> {
5837  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
5838                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5839                 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
5840
5841  // Expecting a i16 load any extended to i32 value.
5842  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src),
5843                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
5844                  [(set VR256:$dst, (IntId (bitconvert
5845                      (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
5846                  OpSize;
5847}
5848
5849let Predicates = [HasAVX] in {
5850defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
5851                                     VEX;
5852defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
5853                                     VEX;
5854}
5855let Predicates = [HasAVX2] in {
5856defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
5857                                       int_x86_avx2_pmovsxbq>, VEX;
5858defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
5859                                       int_x86_avx2_pmovzxbq>, VEX;
5860}
5861defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
5862defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
5863
5864let Predicates = [HasAVX] in {
5865  // Common patterns involving scalar load
5866  def : Pat<(int_x86_sse41_pmovsxbq
5867              (bitconvert (v4i32 (X86vzmovl
5868                            (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
5869            (VPMOVSXBQrm addr:$src)>;
5870
5871  def : Pat<(int_x86_sse41_pmovzxbq
5872              (bitconvert (v4i32 (X86vzmovl
5873                            (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
5874            (VPMOVZXBQrm addr:$src)>;
5875}
5876
5877let Predicates = [HasSSE41] in {
5878  // Common patterns involving scalar load
5879  def : Pat<(int_x86_sse41_pmovsxbq
5880              (bitconvert (v4i32 (X86vzmovl
5881                            (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
5882            (PMOVSXBQrm addr:$src)>;
5883
5884  def : Pat<(int_x86_sse41_pmovzxbq
5885              (bitconvert (v4i32 (X86vzmovl
5886                            (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
5887            (PMOVZXBQrm addr:$src)>;
5888}
5889
5890//===----------------------------------------------------------------------===//
5891// SSE4.1 - Extract Instructions
5892//===----------------------------------------------------------------------===//
5893
5894/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
5895multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
5896  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
5897                 (ins VR128:$src1, i32i8imm:$src2),
5898                 !strconcat(OpcodeStr,
5899                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5900                 [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
5901                 OpSize;
5902  let neverHasSideEffects = 1, mayStore = 1 in
5903  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5904                 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
5905                 !strconcat(OpcodeStr,
5906                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5907                 []>, OpSize;
5908// FIXME:
5909// There's an AssertZext in the way of writing the store pattern
5910// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
5911}
5912
5913let Predicates = [HasAVX] in {
5914  defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
5915  def  VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
5916         (ins VR128:$src1, i32i8imm:$src2),
5917         "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
5918}
5919
5920defm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
5921
5922
5923/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
5924multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
5925  let neverHasSideEffects = 1, mayStore = 1 in
5926  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5927                 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
5928                 !strconcat(OpcodeStr,
5929                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5930                 []>, OpSize;
5931// FIXME:
5932// There's an AssertZext in the way of writing the store pattern
5933// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
5934}
5935
5936let Predicates = [HasAVX] in
5937  defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
5938
5939defm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
5940
5941
5942/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
5943multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
5944  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
5945                 (ins VR128:$src1, i32i8imm:$src2),
5946                 !strconcat(OpcodeStr,
5947                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5948                 [(set GR32:$dst,
5949                  (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
5950  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5951                 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
5952                 !strconcat(OpcodeStr,
5953                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5954                 [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
5955                          addr:$dst)]>, OpSize;
5956}
5957
5958let Predicates = [HasAVX] in
5959  defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
5960
5961defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
5962
5963/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
5964multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
5965  def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
5966                 (ins VR128:$src1, i32i8imm:$src2),
5967                 !strconcat(OpcodeStr,
5968                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5969                 [(set GR64:$dst,
5970                  (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
5971  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5972                 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
5973                 !strconcat(OpcodeStr,
5974                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5975                 [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
5976                          addr:$dst)]>, OpSize, REX_W;
5977}
5978
5979let Predicates = [HasAVX] in
5980  defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
5981
5982defm PEXTRQ      : SS41I_extract64<0x16, "pextrq">;
5983
5984/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
5985/// destination
5986multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
5987  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
5988                 (ins VR128:$src1, i32i8imm:$src2),
5989                 !strconcat(OpcodeStr,
5990                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5991                 [(set GR32:$dst,
5992                    (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
5993           OpSize;
5994  def mr : SS4AIi8<opc, MRMDestMem, (outs),
5995                 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
5996                 !strconcat(OpcodeStr,
5997                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5998                 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
5999                          addr:$dst)]>, OpSize;
6000}
6001
6002let ExeDomain = SSEPackedSingle in {
6003  let Predicates = [HasAVX] in {
6004    defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
6005    def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
6006                    (ins VR128:$src1, i32i8imm:$src2),
6007                    "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
6008                    []>, OpSize, VEX;
6009  }
6010  defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
6011}
6012
6013// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
6014def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
6015                                              imm:$src2))),
6016                 addr:$dst),
6017          (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
6018          Requires<[HasAVX]>;
6019def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
6020                                              imm:$src2))),
6021                 addr:$dst),
6022          (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
6023          Requires<[HasSSE41]>;
6024
6025//===----------------------------------------------------------------------===//
6026// SSE4.1 - Insert Instructions
6027//===----------------------------------------------------------------------===//
6028
6029multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
6030  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
6031      (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
6032      !if(Is2Addr,
6033        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6034        !strconcat(asm,
6035                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6036      [(set VR128:$dst,
6037        (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
6038  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
6039      (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
6040      !if(Is2Addr,
6041        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6042        !strconcat(asm,
6043                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6044      [(set VR128:$dst,
6045        (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
6046                   imm:$src3))]>, OpSize;
6047}
6048
6049let Predicates = [HasAVX] in
6050  defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
6051let Constraints = "$src1 = $dst" in
6052  defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
6053
6054multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
6055  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
6056      (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
6057      !if(Is2Addr,
6058        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6059        !strconcat(asm,
6060                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6061      [(set VR128:$dst,
6062        (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
6063      OpSize;
6064  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
6065      (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
6066      !if(Is2Addr,
6067        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6068        !strconcat(asm,
6069                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6070      [(set VR128:$dst,
6071        (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
6072                          imm:$src3)))]>, OpSize;
6073}
6074
6075let Predicates = [HasAVX] in
6076  defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
6077let Constraints = "$src1 = $dst" in
6078  defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
6079
6080multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
6081  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
6082      (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
6083      !if(Is2Addr,
6084        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6085        !strconcat(asm,
6086                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6087      [(set VR128:$dst,
6088        (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
6089      OpSize;
6090  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
6091      (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
6092      !if(Is2Addr,
6093        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6094        !strconcat(asm,
6095                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6096      [(set VR128:$dst,
6097        (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
6098                          imm:$src3)))]>, OpSize;
6099}
6100
6101let Predicates = [HasAVX] in
6102  defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
6103let Constraints = "$src1 = $dst" in
6104  defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
6105
6106// insertps has a few different modes, there's the first two here below which
6107// are optimized inserts that won't zero arbitrary elements in the destination
6108// vector. The next one matches the intrinsic and could zero arbitrary elements
6109// in the target vector.
6110multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
6111  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
6112      (ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
6113      !if(Is2Addr,
6114        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6115        !strconcat(asm,
6116                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6117      [(set VR128:$dst,
6118        (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
6119      OpSize;
6120  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
6121      (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
6122      !if(Is2Addr,
6123        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6124        !strconcat(asm,
6125                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6126      [(set VR128:$dst,
6127        (X86insrtps VR128:$src1,
6128                   (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
6129                    imm:$src3))]>, OpSize;
6130}
6131
6132let ExeDomain = SSEPackedSingle in {
6133  let Predicates = [HasAVX] in
6134    defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
6135  let Constraints = "$src1 = $dst" in
6136    defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
6137}
6138
6139//===----------------------------------------------------------------------===//
6140// SSE4.1 - Round Instructions
6141//===----------------------------------------------------------------------===//
6142
6143multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
6144                            X86MemOperand x86memop, RegisterClass RC,
6145                            PatFrag mem_frag32, PatFrag mem_frag64,
6146                            Intrinsic V4F32Int, Intrinsic V2F64Int> {
6147let ExeDomain = SSEPackedSingle in {
6148  // Intrinsic operation, reg.
6149  // Vector intrinsic operation, reg
6150  def PSr : SS4AIi8<opcps, MRMSrcReg,
6151                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
6152                    !strconcat(OpcodeStr,
6153                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6154                    [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
6155                    OpSize;
6156
6157  // Vector intrinsic operation, mem
6158  def PSm : SS4AIi8<opcps, MRMSrcMem,
6159                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
6160                    !strconcat(OpcodeStr,
6161                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6162                    [(set RC:$dst,
6163                          (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
6164                    OpSize;
6165} // ExeDomain = SSEPackedSingle
6166
6167let ExeDomain = SSEPackedDouble in {
6168  // Vector intrinsic operation, reg
6169  def PDr : SS4AIi8<opcpd, MRMSrcReg,
6170                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
6171                    !strconcat(OpcodeStr,
6172                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6173                    [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
6174                    OpSize;
6175
6176  // Vector intrinsic operation, mem
6177  def PDm : SS4AIi8<opcpd, MRMSrcMem,
6178                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
6179                    !strconcat(OpcodeStr,
6180                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6181                    [(set RC:$dst,
6182                          (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
6183                    OpSize;
6184} // ExeDomain = SSEPackedDouble
6185}
6186
6187multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
6188                            string OpcodeStr,
6189                            Intrinsic F32Int,
6190                            Intrinsic F64Int, bit Is2Addr = 1> {
6191let ExeDomain = GenericDomain in {
6192  // Operation, reg.
6193  def SSr : SS4AIi8<opcss, MRMSrcReg,
6194      (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
6195      !if(Is2Addr,
6196          !strconcat(OpcodeStr,
6197              "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6198          !strconcat(OpcodeStr,
6199              "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6200      []>, OpSize;
6201
6202  // Intrinsic operation, reg.
6203  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
6204        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
6205        !if(Is2Addr,
6206            !strconcat(OpcodeStr,
6207                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6208            !strconcat(OpcodeStr,
6209                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6210        [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
6211        OpSize;
6212
6213  // Intrinsic operation, mem.
6214  def SSm : SS4AIi8<opcss, MRMSrcMem,
6215        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
6216        !if(Is2Addr,
6217            !strconcat(OpcodeStr,
6218                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6219            !strconcat(OpcodeStr,
6220                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6221        [(set VR128:$dst,
6222             (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
6223        OpSize;
6224
6225  // Operation, reg.
6226  def SDr : SS4AIi8<opcsd, MRMSrcReg,
6227        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
6228        !if(Is2Addr,
6229            !strconcat(OpcodeStr,
6230                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6231            !strconcat(OpcodeStr,
6232                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6233        []>, OpSize;
6234
6235  // Intrinsic operation, reg.
6236  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
6237        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
6238        !if(Is2Addr,
6239            !strconcat(OpcodeStr,
6240                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6241            !strconcat(OpcodeStr,
6242                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6243        [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
6244        OpSize;
6245
6246  // Intrinsic operation, mem.
6247  def SDm : SS4AIi8<opcsd, MRMSrcMem,
6248        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
6249        !if(Is2Addr,
6250            !strconcat(OpcodeStr,
6251                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6252            !strconcat(OpcodeStr,
6253                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6254        [(set VR128:$dst,
6255              (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
6256        OpSize;
6257} // ExeDomain = GenericDomain
6258}
6259
6260// FP round - roundss, roundps, roundsd, roundpd
6261let Predicates = [HasAVX] in {
6262  // Intrinsic form
6263  defm VROUND  : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
6264                                  memopv4f32, memopv2f64,
6265                                  int_x86_sse41_round_ps,
6266                                  int_x86_sse41_round_pd>, VEX;
6267  defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
6268                                  memopv8f32, memopv4f64,
6269                                  int_x86_avx_round_ps_256,
6270                                  int_x86_avx_round_pd_256>, VEX;
6271  defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
6272                                  int_x86_sse41_round_ss,
6273                                  int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
6274
6275  def : Pat<(ffloor FR32:$src),
6276            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
6277  def : Pat<(f64 (ffloor FR64:$src)),
6278            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
6279  def : Pat<(f32 (fnearbyint FR32:$src)),
6280            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
6281  def : Pat<(f64 (fnearbyint FR64:$src)),
6282            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
6283  def : Pat<(f32 (fceil FR32:$src)),
6284            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
6285  def : Pat<(f64 (fceil FR64:$src)),
6286            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
6287  def : Pat<(f32 (frint FR32:$src)),
6288            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
6289  def : Pat<(f64 (frint FR64:$src)),
6290            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
6291  def : Pat<(f32 (ftrunc FR32:$src)),
6292            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
6293  def : Pat<(f64 (ftrunc FR64:$src)),
6294            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
6295}
6296
6297defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
6298                               memopv4f32, memopv2f64,
6299                               int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
6300let Constraints = "$src1 = $dst" in
6301defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
6302                               int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
6303
6304def : Pat<(ffloor FR32:$src),
6305          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
6306def : Pat<(f64 (ffloor FR64:$src)),
6307          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
6308def : Pat<(f32 (fnearbyint FR32:$src)),
6309          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
6310def : Pat<(f64 (fnearbyint FR64:$src)),
6311          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
6312def : Pat<(f32 (fceil FR32:$src)),
6313          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
6314def : Pat<(f64 (fceil FR64:$src)),
6315          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
6316def : Pat<(f32 (frint FR32:$src)),
6317          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
6318def : Pat<(f64 (frint FR64:$src)),
6319          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
6320def : Pat<(f32 (ftrunc FR32:$src)),
6321          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
6322def : Pat<(f64 (ftrunc FR64:$src)),
6323          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
6324
6325//===----------------------------------------------------------------------===//
6326// SSE4.1 - Packed Bit Test
6327//===----------------------------------------------------------------------===//
6328
6329// ptest instruction we'll lower to this in X86ISelLowering primarily from
6330// the intel intrinsic that corresponds to this.
6331let Defs = [EFLAGS], Predicates = [HasAVX] in {
6332def VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
6333                "vptest\t{$src2, $src1|$src1, $src2}",
6334                [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
6335                OpSize, VEX;
6336def VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
6337                "vptest\t{$src2, $src1|$src1, $src2}",
6338                [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
6339                OpSize, VEX;
6340
6341def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
6342                "vptest\t{$src2, $src1|$src1, $src2}",
6343                [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
6344                OpSize, VEX;
6345def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
6346                "vptest\t{$src2, $src1|$src1, $src2}",
6347                [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
6348                OpSize, VEX;
6349}
6350
6351let Defs = [EFLAGS] in {
6352def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
6353              "ptest\t{$src2, $src1|$src1, $src2}",
6354              [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
6355              OpSize;
6356def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
6357              "ptest\t{$src2, $src1|$src1, $src2}",
6358              [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
6359              OpSize;
6360}
6361
6362// The bit test instructions below are AVX only
6363multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
6364                       X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
6365  def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
6366            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
6367            [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
6368  def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
6369            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
6370            [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
6371            OpSize, VEX;
6372}
6373
6374let Defs = [EFLAGS], Predicates = [HasAVX] in {
6375let ExeDomain = SSEPackedSingle in {
6376defm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
6377defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
6378}
6379let ExeDomain = SSEPackedDouble in {
6380defm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
6381defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
6382}
6383}
6384
6385//===----------------------------------------------------------------------===//
6386// SSE4.1 - Misc Instructions
6387//===----------------------------------------------------------------------===//
6388
6389let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
6390  def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
6391                     "popcnt{w}\t{$src, $dst|$dst, $src}",
6392                     [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
6393                     OpSize, XS;
6394  def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
6395                     "popcnt{w}\t{$src, $dst|$dst, $src}",
6396                     [(set GR16:$dst, (ctpop (loadi16 addr:$src))),
6397                      (implicit EFLAGS)]>, OpSize, XS;
6398
6399  def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
6400                     "popcnt{l}\t{$src, $dst|$dst, $src}",
6401                     [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
6402                     XS;
6403  def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
6404                     "popcnt{l}\t{$src, $dst|$dst, $src}",
6405                     [(set GR32:$dst, (ctpop (loadi32 addr:$src))),
6406                      (implicit EFLAGS)]>, XS;
6407
6408  def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
6409                      "popcnt{q}\t{$src, $dst|$dst, $src}",
6410                      [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
6411                      XS;
6412  def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
6413                      "popcnt{q}\t{$src, $dst|$dst, $src}",
6414                      [(set GR64:$dst, (ctpop (loadi64 addr:$src))),
6415                       (implicit EFLAGS)]>, XS;
6416}
6417
6418
6419
6420// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
6421multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
6422                                 Intrinsic IntId128> {
6423  def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
6424                    (ins VR128:$src),
6425                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6426                    [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
6427  def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
6428                     (ins i128mem:$src),
6429                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
6430                     [(set VR128:$dst,
6431                       (IntId128
6432                        (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
6433}
6434
6435let Predicates = [HasAVX] in
6436defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
6437                                         int_x86_sse41_phminposuw>, VEX;
6438defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
6439                                         int_x86_sse41_phminposuw>;
6440
6441/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
6442multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
6443                              Intrinsic IntId128, bit Is2Addr = 1> {
6444  let isCommutable = 1 in
6445  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
6446       (ins VR128:$src1, VR128:$src2),
6447       !if(Is2Addr,
6448           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6449           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6450       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
6451  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
6452       (ins VR128:$src1, i128mem:$src2),
6453       !if(Is2Addr,
6454           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6455           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6456       [(set VR128:$dst,
6457         (IntId128 VR128:$src1,
6458          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
6459}
6460
6461/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
6462multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
6463                                Intrinsic IntId256> {
6464  let isCommutable = 1 in
6465  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst),
6466       (ins VR256:$src1, VR256:$src2),
6467       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6468       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, OpSize;
6469  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst),
6470       (ins VR256:$src1, i256mem:$src2),
6471       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6472       [(set VR256:$dst,
6473         (IntId256 VR256:$src1,
6474          (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
6475}
6476
6477let Predicates = [HasAVX] in {
6478  let isCommutable = 0 in
6479  defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
6480                                                         0>, VEX_4V;
6481  defm VPMINSB   : SS41I_binop_rm_int<0x38, "vpminsb",   int_x86_sse41_pminsb,
6482                                                         0>, VEX_4V;
6483  defm VPMINSD   : SS41I_binop_rm_int<0x39, "vpminsd",   int_x86_sse41_pminsd,
6484                                                         0>, VEX_4V;
6485  defm VPMINUD   : SS41I_binop_rm_int<0x3B, "vpminud",   int_x86_sse41_pminud,
6486                                                         0>, VEX_4V;
6487  defm VPMINUW   : SS41I_binop_rm_int<0x3A, "vpminuw",   int_x86_sse41_pminuw,
6488                                                         0>, VEX_4V;
6489  defm VPMAXSB   : SS41I_binop_rm_int<0x3C, "vpmaxsb",   int_x86_sse41_pmaxsb,
6490                                                         0>, VEX_4V;
6491  defm VPMAXSD   : SS41I_binop_rm_int<0x3D, "vpmaxsd",   int_x86_sse41_pmaxsd,
6492                                                         0>, VEX_4V;
6493  defm VPMAXUD   : SS41I_binop_rm_int<0x3F, "vpmaxud",   int_x86_sse41_pmaxud,
6494                                                         0>, VEX_4V;
6495  defm VPMAXUW   : SS41I_binop_rm_int<0x3E, "vpmaxuw",   int_x86_sse41_pmaxuw,
6496                                                         0>, VEX_4V;
6497  defm VPMULDQ   : SS41I_binop_rm_int<0x28, "vpmuldq",   int_x86_sse41_pmuldq,
6498                                                         0>, VEX_4V;
6499}
6500
6501let Predicates = [HasAVX2] in {
6502  let isCommutable = 0 in
6503  defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
6504                                        int_x86_avx2_packusdw>, VEX_4V;
6505  defm VPMINSB   : SS41I_binop_rm_int_y<0x38, "vpminsb",
6506                                        int_x86_avx2_pmins_b>, VEX_4V;
6507  defm VPMINSD   : SS41I_binop_rm_int_y<0x39, "vpminsd",
6508                                        int_x86_avx2_pmins_d>, VEX_4V;
6509  defm VPMINUD   : SS41I_binop_rm_int_y<0x3B, "vpminud",
6510                                        int_x86_avx2_pminu_d>, VEX_4V;
6511  defm VPMINUW   : SS41I_binop_rm_int_y<0x3A, "vpminuw",
6512                                        int_x86_avx2_pminu_w>, VEX_4V;
6513  defm VPMAXSB   : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
6514                                        int_x86_avx2_pmaxs_b>, VEX_4V;
6515  defm VPMAXSD   : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
6516                                        int_x86_avx2_pmaxs_d>, VEX_4V;
6517  defm VPMAXUD   : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
6518                                        int_x86_avx2_pmaxu_d>, VEX_4V;
6519  defm VPMAXUW   : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
6520                                        int_x86_avx2_pmaxu_w>, VEX_4V;
6521  defm VPMULDQ   : SS41I_binop_rm_int_y<0x28, "vpmuldq",
6522                                        int_x86_avx2_pmul_dq>, VEX_4V;
6523}
6524
6525let Constraints = "$src1 = $dst" in {
6526  let isCommutable = 0 in
6527  defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
6528  defm PMINSB   : SS41I_binop_rm_int<0x38, "pminsb",   int_x86_sse41_pminsb>;
6529  defm PMINSD   : SS41I_binop_rm_int<0x39, "pminsd",   int_x86_sse41_pminsd>;
6530  defm PMINUD   : SS41I_binop_rm_int<0x3B, "pminud",   int_x86_sse41_pminud>;
6531  defm PMINUW   : SS41I_binop_rm_int<0x3A, "pminuw",   int_x86_sse41_pminuw>;
6532  defm PMAXSB   : SS41I_binop_rm_int<0x3C, "pmaxsb",   int_x86_sse41_pmaxsb>;
6533  defm PMAXSD   : SS41I_binop_rm_int<0x3D, "pmaxsd",   int_x86_sse41_pmaxsd>;
6534  defm PMAXUD   : SS41I_binop_rm_int<0x3F, "pmaxud",   int_x86_sse41_pmaxud>;
6535  defm PMAXUW   : SS41I_binop_rm_int<0x3E, "pmaxuw",   int_x86_sse41_pmaxuw>;
6536  defm PMULDQ   : SS41I_binop_rm_int<0x28, "pmuldq",   int_x86_sse41_pmuldq>;
6537}
6538
6539/// SS48I_binop_rm - Simple SSE41 binary operator.
6540multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6541                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
6542                          X86MemOperand x86memop, bit Is2Addr = 1> {
6543  let isCommutable = 1 in
6544  def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
6545       (ins RC:$src1, RC:$src2),
6546       !if(Is2Addr,
6547           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6548           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6549       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize;
6550  def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
6551       (ins RC:$src1, x86memop:$src2),
6552       !if(Is2Addr,
6553           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6554           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6555       [(set RC:$dst,
6556         (OpVT (OpNode RC:$src1,
6557          (bitconvert (memop_frag addr:$src2)))))]>, OpSize;
6558}
6559
6560let Predicates = [HasAVX] in {
6561  defm VPMULLD  : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
6562                                memopv2i64, i128mem, 0>, VEX_4V;
6563  defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
6564                                 memopv2i64, i128mem, 0>, VEX_4V;
6565}
6566let Predicates = [HasAVX2] in {
6567  defm VPMULLDY  : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
6568                                  memopv4i64, i256mem, 0>, VEX_4V;
6569  defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
6570                                  memopv4i64, i256mem, 0>, VEX_4V;
6571}
6572
6573let Constraints = "$src1 = $dst" in {
6574  defm PMULLD  : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
6575                                memopv2i64, i128mem>;
6576  defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
6577                                memopv2i64, i128mem>;
6578}
6579
6580/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
6581multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
6582                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
6583                 X86MemOperand x86memop, bit Is2Addr = 1> {
6584  let isCommutable = 1 in
6585  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
6586        (ins RC:$src1, RC:$src2, u32u8imm:$src3),
6587        !if(Is2Addr,
6588            !strconcat(OpcodeStr,
6589                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6590            !strconcat(OpcodeStr,
6591                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6592        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
6593        OpSize;
6594  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
6595        (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
6596        !if(Is2Addr,
6597            !strconcat(OpcodeStr,
6598                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6599            !strconcat(OpcodeStr,
6600                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
6601        [(set RC:$dst,
6602          (IntId RC:$src1,
6603           (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
6604        OpSize;
6605}
6606
6607let Predicates = [HasAVX] in {
6608  let isCommutable = 0 in {
6609    let ExeDomain = SSEPackedSingle in {
6610    defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
6611                                        VR128, memopv4f32, i128mem, 0>, VEX_4V;
6612    defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
6613              int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V;
6614    }
6615    let ExeDomain = SSEPackedDouble in {
6616    defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
6617                                        VR128, memopv2f64, i128mem, 0>, VEX_4V;
6618    defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
6619              int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V;
6620    }
6621  defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
6622                                      VR128, memopv2i64, i128mem, 0>, VEX_4V;
6623  defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
6624                                      VR128, memopv2i64, i128mem, 0>, VEX_4V;
6625  }
6626  let ExeDomain = SSEPackedSingle in
6627  defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
6628                                   VR128, memopv4f32, i128mem, 0>, VEX_4V;
6629  let ExeDomain = SSEPackedDouble in
6630  defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
6631                                   VR128, memopv2f64, i128mem, 0>, VEX_4V;
6632  let ExeDomain = SSEPackedSingle in
6633  defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
6634                                   VR256, memopv8f32, i256mem, 0>, VEX_4V;
6635}
6636
6637let Predicates = [HasAVX2] in {
6638  let isCommutable = 0 in {
6639  defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
6640                                       VR256, memopv4i64, i256mem, 0>, VEX_4V;
6641  defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
6642                                       VR256, memopv4i64, i256mem, 0>, VEX_4V;
6643  }
6644}
6645
6646let Constraints = "$src1 = $dst" in {
6647  let isCommutable = 0 in {
6648  let ExeDomain = SSEPackedSingle in
6649  defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
6650                                     VR128, memopv4f32, i128mem>;
6651  let ExeDomain = SSEPackedDouble in
6652  defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
6653                                     VR128, memopv2f64, i128mem>;
6654  defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
6655                                     VR128, memopv2i64, i128mem>;
6656  defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
6657                                     VR128, memopv2i64, i128mem>;
6658  }
6659  let ExeDomain = SSEPackedSingle in
6660  defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
6661                                  VR128, memopv4f32, i128mem>;
6662  let ExeDomain = SSEPackedDouble in
6663  defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
6664                                  VR128, memopv2f64, i128mem>;
6665}
6666
6667/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
6668multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
6669                                    RegisterClass RC, X86MemOperand x86memop,
6670                                    PatFrag mem_frag, Intrinsic IntId> {
6671  def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
6672                  (ins RC:$src1, RC:$src2, RC:$src3),
6673                  !strconcat(OpcodeStr,
6674                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
6675                  [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
6676                  IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
6677
6678  def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
6679                  (ins RC:$src1, x86memop:$src2, RC:$src3),
6680                  !strconcat(OpcodeStr,
6681                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
6682                  [(set RC:$dst,
6683                        (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
6684                               RC:$src3))],
6685                  IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
6686}
6687
6688let Predicates = [HasAVX] in {
6689let ExeDomain = SSEPackedDouble in {
6690defm VBLENDVPD  : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
6691                                           memopv2f64, int_x86_sse41_blendvpd>;
6692defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
6693                                         memopv4f64, int_x86_avx_blendv_pd_256>;
6694} // ExeDomain = SSEPackedDouble
6695let ExeDomain = SSEPackedSingle in {
6696defm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
6697                                           memopv4f32, int_x86_sse41_blendvps>;
6698defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
6699                                         memopv8f32, int_x86_avx_blendv_ps_256>;
6700} // ExeDomain = SSEPackedSingle
6701defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
6702                                           memopv2i64, int_x86_sse41_pblendvb>;
6703}
6704
6705let Predicates = [HasAVX2] in {
6706defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
6707                                           memopv4i64, int_x86_avx2_pblendvb>;
6708}
6709
6710let Predicates = [HasAVX] in {
6711  def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
6712                            (v16i8 VR128:$src2))),
6713            (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6714  def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1),
6715                            (v4i32 VR128:$src2))),
6716            (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6717  def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1),
6718                            (v4f32 VR128:$src2))),
6719            (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6720  def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1),
6721                            (v2i64 VR128:$src2))),
6722            (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6723  def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1),
6724                            (v2f64 VR128:$src2))),
6725            (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>;
6726  def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1),
6727                            (v8i32 VR256:$src2))),
6728            (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6729  def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1),
6730                            (v8f32 VR256:$src2))),
6731            (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6732  def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1),
6733                            (v4i64 VR256:$src2))),
6734            (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6735  def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
6736                            (v4f64 VR256:$src2))),
6737            (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6738
6739  def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
6740                               (imm:$mask))),
6741            (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
6742  def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
6743                               (imm:$mask))),
6744            (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
6745
6746  def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
6747                               (imm:$mask))),
6748            (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
6749  def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
6750                               (imm:$mask))),
6751            (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
6752  def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
6753                               (imm:$mask))),
6754            (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
6755}
6756
6757let Predicates = [HasAVX2] in {
6758  def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
6759                            (v32i8 VR256:$src2))),
6760            (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
6761  def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
6762                               (imm:$mask))),
6763            (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
6764}
6765
6766/// SS41I_ternary_int - SSE 4.1 ternary operator
6767let Uses = [XMM0], Constraints = "$src1 = $dst" in {
6768  multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
6769                               Intrinsic IntId> {
6770    def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
6771                    (ins VR128:$src1, VR128:$src2),
6772                    !strconcat(OpcodeStr,
6773                     "\t{$src2, $dst|$dst, $src2}"),
6774                    [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
6775                    OpSize;
6776
6777    def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
6778                    (ins VR128:$src1, i128mem:$src2),
6779                    !strconcat(OpcodeStr,
6780                     "\t{$src2, $dst|$dst, $src2}"),
6781                    [(set VR128:$dst,
6782                      (IntId VR128:$src1,
6783                       (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
6784  }
6785}
6786
6787let ExeDomain = SSEPackedDouble in
6788defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64,
6789                                  int_x86_sse41_blendvpd>;
6790let ExeDomain = SSEPackedSingle in
6791defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32,
6792                                  int_x86_sse41_blendvps>;
6793defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64,
6794                                  int_x86_sse41_pblendvb>;
6795
6796let Predicates = [HasSSE41] in {
6797  def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
6798                            (v16i8 VR128:$src2))),
6799            (PBLENDVBrr0 VR128:$src2, VR128:$src1)>;
6800  def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1),
6801                            (v4i32 VR128:$src2))),
6802            (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
6803  def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1),
6804                            (v4f32 VR128:$src2))),
6805            (BLENDVPSrr0 VR128:$src2, VR128:$src1)>;
6806  def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1),
6807                            (v2i64 VR128:$src2))),
6808            (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
6809  def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
6810                            (v2f64 VR128:$src2))),
6811            (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
6812
6813  def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
6814                               (imm:$mask))),
6815            (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
6816  def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
6817                               (imm:$mask))),
6818            (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
6819  def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
6820                               (imm:$mask))),
6821            (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
6822
6823}
6824
6825let Predicates = [HasAVX] in
6826def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
6827                       "vmovntdqa\t{$src, $dst|$dst, $src}",
6828                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
6829                       OpSize, VEX;
6830let Predicates = [HasAVX2] in
6831def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
6832                         "vmovntdqa\t{$src, $dst|$dst, $src}",
6833                         [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
6834                         OpSize, VEX;
6835def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
6836                       "movntdqa\t{$src, $dst|$dst, $src}",
6837                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
6838                       OpSize;
6839
6840//===----------------------------------------------------------------------===//
6841// SSE4.2 - Compare Instructions
6842//===----------------------------------------------------------------------===//
6843
6844/// SS42I_binop_rm - Simple SSE 4.2 binary operator
6845multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6846                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
6847                          X86MemOperand x86memop, bit Is2Addr = 1> {
6848  def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
6849       (ins RC:$src1, RC:$src2),
6850       !if(Is2Addr,
6851           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6852           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6853       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
6854       OpSize;
6855  def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
6856       (ins RC:$src1, x86memop:$src2),
6857       !if(Is2Addr,
6858           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
6859           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
6860       [(set RC:$dst,
6861         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize;
6862}
6863
6864let Predicates = [HasAVX] in
6865  defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
6866                                 memopv2i64, i128mem, 0>, VEX_4V;
6867
6868let Predicates = [HasAVX2] in
6869  defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
6870                                  memopv4i64, i256mem, 0>, VEX_4V;
6871
6872let Constraints = "$src1 = $dst" in
6873  defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
6874                                memopv2i64, i128mem>;
6875
6876//===----------------------------------------------------------------------===//
6877// SSE4.2 - String/text Processing Instructions
6878//===----------------------------------------------------------------------===//
6879
6880// Packed Compare Implicit Length Strings, Return Mask
6881multiclass pseudo_pcmpistrm<string asm> {
6882  def REG : PseudoI<(outs VR128:$dst),
6883                    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6884    [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
6885                                                  imm:$src3))]>;
6886  def MEM : PseudoI<(outs VR128:$dst),
6887                    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6888    [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
6889                       VR128:$src1, (load addr:$src2), imm:$src3))]>;
6890}
6891
6892let Defs = [EFLAGS], usesCustomInserter = 1 in {
6893  let AddedComplexity = 1 in
6894    defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
6895  defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
6896}
6897
6898let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
6899  def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
6900      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6901      "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
6902  let mayLoad = 1 in
6903  def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
6904      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6905      "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
6906}
6907
6908let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
6909  def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
6910      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6911      "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
6912  let mayLoad = 1 in
6913  def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
6914      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6915      "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
6916}
6917
6918// Packed Compare Explicit Length Strings, Return Mask
6919multiclass pseudo_pcmpestrm<string asm> {
6920  def REG : PseudoI<(outs VR128:$dst),
6921                    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
6922    [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
6923                       VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
6924  def MEM : PseudoI<(outs VR128:$dst),
6925                    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
6926    [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
6927                       VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
6928}
6929
6930let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
6931  let AddedComplexity = 1 in
6932    defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
6933  defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
6934}
6935
6936let Predicates = [HasAVX],
6937    Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
6938  def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
6939      (ins VR128:$src1, VR128:$src3, i8imm:$src5),
6940      "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
6941  let mayLoad = 1 in
6942  def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
6943      (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
6944      "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
6945}
6946
6947let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
6948  def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
6949      (ins VR128:$src1, VR128:$src3, i8imm:$src5),
6950      "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
6951  let mayLoad = 1 in
6952  def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
6953      (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
6954      "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
6955}
6956
6957// Packed Compare Implicit Length Strings, Return Index
6958let Defs = [ECX, EFLAGS] in {
6959  multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
6960    def rr : SS42AI<0x63, MRMSrcReg, (outs),
6961      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
6962      !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
6963      [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
6964       (implicit EFLAGS)]>, OpSize;
6965    def rm : SS42AI<0x63, MRMSrcMem, (outs),
6966      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
6967      !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
6968      [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
6969       (implicit EFLAGS)]>, OpSize;
6970  }
6971}
6972
6973let Predicates = [HasAVX] in {
6974defm VPCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
6975                                    VEX;
6976defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
6977                                    VEX;
6978defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
6979                                    VEX;
6980defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
6981                                    VEX;
6982defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
6983                                    VEX;
6984defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
6985                                    VEX;
6986}
6987
6988defm PCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
6989defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
6990defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
6991defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
6992defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
6993defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
6994
6995// Packed Compare Explicit Length Strings, Return Index
6996let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
6997  multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
6998    def rr : SS42AI<0x61, MRMSrcReg, (outs),
6999      (ins VR128:$src1, VR128:$src3, i8imm:$src5),
7000      !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7001      [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
7002       (implicit EFLAGS)]>, OpSize;
7003    def rm : SS42AI<0x61, MRMSrcMem, (outs),
7004      (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
7005      !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
7006       [(set ECX,
7007             (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
7008        (implicit EFLAGS)]>, OpSize;
7009  }
7010}
7011
7012let Predicates = [HasAVX] in {
7013defm VPCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
7014                                    VEX;
7015defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
7016                                    VEX;
7017defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
7018                                    VEX;
7019defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
7020                                    VEX;
7021defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
7022                                    VEX;
7023defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
7024                                    VEX;
7025}
7026
7027defm PCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
7028defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
7029defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
7030defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
7031defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
7032defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
7033
7034//===----------------------------------------------------------------------===//
7035// SSE4.2 - CRC Instructions
7036//===----------------------------------------------------------------------===//
7037
7038// No CRC instructions have AVX equivalents
7039
7040// crc intrinsic instruction
7041// This set of instructions are only rm, the only difference is the size
7042// of r and m.
7043let Constraints = "$src1 = $dst" in {
7044  def CRC32r32m8  : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
7045                      (ins GR32:$src1, i8mem:$src2),
7046                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
7047                       [(set GR32:$dst,
7048                         (int_x86_sse42_crc32_32_8 GR32:$src1,
7049                         (load addr:$src2)))]>;
7050  def CRC32r32r8  : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
7051                      (ins GR32:$src1, GR8:$src2),
7052                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
7053                       [(set GR32:$dst,
7054                         (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
7055  def CRC32r32m16  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
7056                      (ins GR32:$src1, i16mem:$src2),
7057                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
7058                       [(set GR32:$dst,
7059                         (int_x86_sse42_crc32_32_16 GR32:$src1,
7060                         (load addr:$src2)))]>,
7061                         OpSize;
7062  def CRC32r32r16  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
7063                      (ins GR32:$src1, GR16:$src2),
7064                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
7065                       [(set GR32:$dst,
7066                         (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
7067                         OpSize;
7068  def CRC32r32m32  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
7069                      (ins GR32:$src1, i32mem:$src2),
7070                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
7071                       [(set GR32:$dst,
7072                         (int_x86_sse42_crc32_32_32 GR32:$src1,
7073                         (load addr:$src2)))]>;
7074  def CRC32r32r32  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
7075                      (ins GR32:$src1, GR32:$src2),
7076                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
7077                       [(set GR32:$dst,
7078                         (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
7079  def CRC32r64m8  : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
7080                      (ins GR64:$src1, i8mem:$src2),
7081                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
7082                       [(set GR64:$dst,
7083                         (int_x86_sse42_crc32_64_8 GR64:$src1,
7084                         (load addr:$src2)))]>,
7085                         REX_W;
7086  def CRC32r64r8  : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
7087                      (ins GR64:$src1, GR8:$src2),
7088                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
7089                       [(set GR64:$dst,
7090                         (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
7091                         REX_W;
7092  def CRC32r64m64  : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
7093                      (ins GR64:$src1, i64mem:$src2),
7094                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
7095                       [(set GR64:$dst,
7096                         (int_x86_sse42_crc32_64_64 GR64:$src1,
7097                         (load addr:$src2)))]>,
7098                         REX_W;
7099  def CRC32r64r64  : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
7100                      (ins GR64:$src1, GR64:$src2),
7101                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
7102                       [(set GR64:$dst,
7103                         (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
7104                         REX_W;
7105}
7106
7107//===----------------------------------------------------------------------===//
7108// AES-NI Instructions
7109//===----------------------------------------------------------------------===//
7110
7111multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
7112                              Intrinsic IntId128, bit Is2Addr = 1> {
7113  def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
7114       (ins VR128:$src1, VR128:$src2),
7115       !if(Is2Addr,
7116           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7117           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7118       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
7119       OpSize;
7120  def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
7121       (ins VR128:$src1, i128mem:$src2),
7122       !if(Is2Addr,
7123           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
7124           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
7125       [(set VR128:$dst,
7126         (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize;
7127}
7128
7129// Perform One Round of an AES Encryption/Decryption Flow
7130let Predicates = [HasAVX, HasAES] in {
7131  defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
7132                         int_x86_aesni_aesenc, 0>, VEX_4V;
7133  defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
7134                         int_x86_aesni_aesenclast, 0>, VEX_4V;
7135  defm VAESDEC          : AESI_binop_rm_int<0xDE, "vaesdec",
7136                         int_x86_aesni_aesdec, 0>, VEX_4V;
7137  defm VAESDECLAST      : AESI_binop_rm_int<0xDF, "vaesdeclast",
7138                         int_x86_aesni_aesdeclast, 0>, VEX_4V;
7139}
7140
7141let Constraints = "$src1 = $dst" in {
7142  defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
7143                         int_x86_aesni_aesenc>;
7144  defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
7145                         int_x86_aesni_aesenclast>;
7146  defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
7147                         int_x86_aesni_aesdec>;
7148  defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
7149                         int_x86_aesni_aesdeclast>;
7150}
7151
7152// Perform the AES InvMixColumn Transformation
7153let Predicates = [HasAVX, HasAES] in {
7154  def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
7155      (ins VR128:$src1),
7156      "vaesimc\t{$src1, $dst|$dst, $src1}",
7157      [(set VR128:$dst,
7158        (int_x86_aesni_aesimc VR128:$src1))]>,
7159      OpSize, VEX;
7160  def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
7161      (ins i128mem:$src1),
7162      "vaesimc\t{$src1, $dst|$dst, $src1}",
7163      [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
7164      OpSize, VEX;
7165}
7166def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
7167  (ins VR128:$src1),
7168  "aesimc\t{$src1, $dst|$dst, $src1}",
7169  [(set VR128:$dst,
7170    (int_x86_aesni_aesimc VR128:$src1))]>,
7171  OpSize;
7172def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
7173  (ins i128mem:$src1),
7174  "aesimc\t{$src1, $dst|$dst, $src1}",
7175  [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
7176  OpSize;
7177
7178// AES Round Key Generation Assist
7179let Predicates = [HasAVX, HasAES] in {
7180  def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
7181      (ins VR128:$src1, i8imm:$src2),
7182      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7183      [(set VR128:$dst,
7184        (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
7185      OpSize, VEX;
7186  def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
7187      (ins i128mem:$src1, i8imm:$src2),
7188      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7189      [(set VR128:$dst,
7190        (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
7191      OpSize, VEX;
7192}
7193def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
7194  (ins VR128:$src1, i8imm:$src2),
7195  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7196  [(set VR128:$dst,
7197    (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
7198  OpSize;
7199def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
7200  (ins i128mem:$src1, i8imm:$src2),
7201  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7202  [(set VR128:$dst,
7203    (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
7204  OpSize;
7205
7206//===----------------------------------------------------------------------===//
7207// CLMUL Instructions
7208//===----------------------------------------------------------------------===//
7209
7210// Carry-less Multiplication instructions
7211let neverHasSideEffects = 1 in {
7212// AVX carry-less Multiplication instructions
7213def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
7214           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7215           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7216           []>;
7217
7218let mayLoad = 1 in
7219def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
7220           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7221           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7222           []>;
7223
7224let Constraints = "$src1 = $dst" in {
7225def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
7226           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
7227           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
7228           []>;
7229
7230let mayLoad = 1 in
7231def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
7232           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
7233           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
7234           []>;
7235} // Constraints = "$src1 = $dst"
7236} // neverHasSideEffects = 1
7237
7238
7239multiclass pclmul_alias<string asm, int immop> {
7240  def : InstAlias<!strconcat("pclmul", asm,
7241                           "dq {$src, $dst|$dst, $src}"),
7242                  (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
7243
7244  def : InstAlias<!strconcat("pclmul", asm,
7245                             "dq {$src, $dst|$dst, $src}"),
7246                  (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
7247
7248  def : InstAlias<!strconcat("vpclmul", asm,
7249                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
7250                  (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
7251
7252  def : InstAlias<!strconcat("vpclmul", asm,
7253                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
7254                  (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
7255}
7256defm : pclmul_alias<"hqhq", 0x11>;
7257defm : pclmul_alias<"hqlq", 0x01>;
7258defm : pclmul_alias<"lqhq", 0x10>;
7259defm : pclmul_alias<"lqlq", 0x00>;
7260
7261//===----------------------------------------------------------------------===//
7262// AVX Instructions
7263//===----------------------------------------------------------------------===//
7264
7265//===----------------------------------------------------------------------===//
7266// VBROADCAST - Load from memory and broadcast to all elements of the
7267//              destination operand
7268//
7269class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
7270                    X86MemOperand x86memop, Intrinsic Int> :
7271  AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
7272        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7273        [(set RC:$dst, (Int addr:$src))]>, VEX;
7274
7275// AVX2 adds register forms
7276class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
7277                         Intrinsic Int> :
7278  AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
7279         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7280         [(set RC:$dst, (Int VR128:$src))]>, VEX;
7281
7282let ExeDomain = SSEPackedSingle in {
7283  def VBROADCASTSSrm  : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
7284                                      int_x86_avx_vbroadcast_ss>;
7285  def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
7286                                      int_x86_avx_vbroadcast_ss_256>;
7287}
7288let ExeDomain = SSEPackedDouble in
7289def VBROADCASTSDrm  : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
7290                                    int_x86_avx_vbroadcast_sd_256>;
7291def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
7292                                   int_x86_avx_vbroadcastf128_pd_256>;
7293
7294let ExeDomain = SSEPackedSingle in {
7295  def VBROADCASTSSrr  : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
7296                                           int_x86_avx2_vbroadcast_ss_ps>;
7297  def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
7298                                           int_x86_avx2_vbroadcast_ss_ps_256>;
7299}
7300let ExeDomain = SSEPackedDouble in
7301def VBROADCASTSDrr  : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
7302                                         int_x86_avx2_vbroadcast_sd_pd_256>;
7303
7304let Predicates = [HasAVX2] in
7305def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
7306                                   int_x86_avx2_vbroadcasti128>;
7307
7308let Predicates = [HasAVX] in
7309def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
7310          (VBROADCASTF128 addr:$src)>;
7311
7312
7313//===----------------------------------------------------------------------===//
7314// VINSERTF128 - Insert packed floating-point values
7315//
7316let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
7317def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
7318          (ins VR256:$src1, VR128:$src2, i8imm:$src3),
7319          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7320          []>, VEX_4V;
7321let mayLoad = 1 in
7322def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
7323          (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
7324          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7325          []>, VEX_4V;
7326}
7327
7328let Predicates = [HasAVX] in {
7329def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
7330                                   (i32 imm)),
7331          (VINSERTF128rr VR256:$src1, VR128:$src2,
7332                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7333def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
7334                                   (i32 imm)),
7335          (VINSERTF128rr VR256:$src1, VR128:$src2,
7336                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7337def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
7338                                   (i32 imm)),
7339          (VINSERTF128rr VR256:$src1, VR128:$src2,
7340                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7341def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
7342                                   (i32 imm)),
7343          (VINSERTF128rr VR256:$src1, VR128:$src2,
7344                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7345def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
7346                                   (i32 imm)),
7347          (VINSERTF128rr VR256:$src1, VR128:$src2,
7348                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7349def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
7350                                   (i32 imm)),
7351          (VINSERTF128rr VR256:$src1, VR128:$src2,
7352                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7353
7354def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
7355                                   (i32 imm)),
7356          (VINSERTF128rm VR256:$src1, addr:$src2,
7357                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7358def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
7359                                   (i32 imm)),
7360          (VINSERTF128rm VR256:$src1, addr:$src2,
7361                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7362def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
7363                                   (i32 imm)),
7364          (VINSERTF128rm VR256:$src1, addr:$src2,
7365                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7366}
7367
7368//===----------------------------------------------------------------------===//
7369// VEXTRACTF128 - Extract packed floating-point values
7370//
7371let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
7372def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
7373          (ins VR256:$src1, i8imm:$src2),
7374          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7375          []>, VEX;
7376let mayStore = 1 in
7377def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
7378          (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
7379          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7380          []>, VEX;
7381}
7382
7383// Extract and store.
7384let Predicates = [HasAVX] in {
7385  def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst),
7386          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
7387  def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst),
7388          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
7389  def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst),
7390          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
7391
7392  def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)),
7393          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
7394  def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)),
7395          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
7396  def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))),
7397          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
7398}
7399
7400// AVX1 patterns
7401let Predicates = [HasAVX] in {
7402def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
7403          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
7404def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
7405          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
7406def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
7407          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
7408
7409def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7410          (v4f32 (VEXTRACTF128rr
7411                    (v8f32 VR256:$src1),
7412                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7413def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7414          (v2f64 (VEXTRACTF128rr
7415                    (v4f64 VR256:$src1),
7416                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7417def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7418          (v2i64 (VEXTRACTF128rr
7419                    (v4i64 VR256:$src1),
7420                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7421def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7422          (v4i32 (VEXTRACTF128rr
7423                    (v8i32 VR256:$src1),
7424                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7425def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7426          (v8i16 (VEXTRACTF128rr
7427                    (v16i16 VR256:$src1),
7428                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7429def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7430          (v16i8 (VEXTRACTF128rr
7431                    (v32i8 VR256:$src1),
7432                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7433}
7434
7435//===----------------------------------------------------------------------===//
7436// VMASKMOV - Conditional SIMD Packed Loads and Stores
7437//
7438multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
7439                          Intrinsic IntLd, Intrinsic IntLd256,
7440                          Intrinsic IntSt, Intrinsic IntSt256> {
7441  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
7442             (ins VR128:$src1, f128mem:$src2),
7443             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7444             [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
7445             VEX_4V;
7446  def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
7447             (ins VR256:$src1, f256mem:$src2),
7448             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7449             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
7450             VEX_4V;
7451  def mr  : AVX8I<opc_mr, MRMDestMem, (outs),
7452             (ins f128mem:$dst, VR128:$src1, VR128:$src2),
7453             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7454             [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
7455  def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
7456             (ins f256mem:$dst, VR256:$src1, VR256:$src2),
7457             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7458             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
7459}
7460
7461let ExeDomain = SSEPackedSingle in
7462defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
7463                                 int_x86_avx_maskload_ps,
7464                                 int_x86_avx_maskload_ps_256,
7465                                 int_x86_avx_maskstore_ps,
7466                                 int_x86_avx_maskstore_ps_256>;
7467let ExeDomain = SSEPackedDouble in
7468defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
7469                                 int_x86_avx_maskload_pd,
7470                                 int_x86_avx_maskload_pd_256,
7471                                 int_x86_avx_maskstore_pd,
7472                                 int_x86_avx_maskstore_pd_256>;
7473
7474//===----------------------------------------------------------------------===//
7475// VPERMIL - Permute Single and Double Floating-Point Values
7476//
7477multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
7478                      RegisterClass RC, X86MemOperand x86memop_f,
7479                      X86MemOperand x86memop_i, PatFrag i_frag,
7480                      Intrinsic IntVar, ValueType vt> {
7481  def rr  : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
7482             (ins RC:$src1, RC:$src2),
7483             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7484             [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
7485  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
7486             (ins RC:$src1, x86memop_i:$src2),
7487             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7488             [(set RC:$dst, (IntVar RC:$src1,
7489                             (bitconvert (i_frag addr:$src2))))]>, VEX_4V;
7490
7491  def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
7492             (ins RC:$src1, i8imm:$src2),
7493             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7494             [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX;
7495  def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
7496             (ins x86memop_f:$src1, i8imm:$src2),
7497             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7498             [(set RC:$dst,
7499               (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX;
7500}
7501
7502let ExeDomain = SSEPackedSingle in {
7503  defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
7504                               memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
7505  defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
7506                              memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>;
7507}
7508let ExeDomain = SSEPackedDouble in {
7509  defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
7510                               memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
7511  defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
7512                              memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>;
7513}
7514
7515let Predicates = [HasAVX] in {
7516def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
7517          (VPERMILPSYri VR256:$src1, imm:$imm)>;
7518def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
7519          (VPERMILPDYri VR256:$src1, imm:$imm)>;
7520def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
7521                               (i8 imm:$imm))),
7522          (VPERMILPSYmi addr:$src1, imm:$imm)>;
7523def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
7524          (VPERMILPDYmi addr:$src1, imm:$imm)>;
7525
7526def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
7527          (VPERMILPDri VR128:$src1, imm:$imm)>;
7528def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
7529          (VPERMILPDmi addr:$src1, imm:$imm)>;
7530}
7531
7532//===----------------------------------------------------------------------===//
7533// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
7534//
7535let ExeDomain = SSEPackedSingle in {
7536def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
7537          (ins VR256:$src1, VR256:$src2, i8imm:$src3),
7538          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7539          [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2,
7540                              (i8 imm:$src3))))]>, VEX_4V;
7541def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
7542          (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
7543          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7544          [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2),
7545                             (i8 imm:$src3)))]>, VEX_4V;
7546}
7547
7548let Predicates = [HasAVX] in {
7549def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7550          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7551def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7552          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7553def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7554          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7555def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7556          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7557def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7558          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7559
7560def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
7561                  (memopv8f32 addr:$src2), (i8 imm:$imm))),
7562          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
7563def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
7564                  (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
7565          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
7566def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
7567                  (memopv4i64 addr:$src2), (i8 imm:$imm))),
7568          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
7569def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
7570                  (memopv4f64 addr:$src2), (i8 imm:$imm))),
7571          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
7572def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
7573                  (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
7574          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
7575def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
7576                  (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
7577          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
7578}
7579
7580//===----------------------------------------------------------------------===//
7581// VZERO - Zero YMM registers
7582//
7583let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
7584            YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
7585  // Zero All YMM registers
7586  def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
7587                  [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>;
7588
7589  // Zero Upper bits of YMM registers
7590  def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
7591                     [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>;
7592}
7593
7594//===----------------------------------------------------------------------===//
7595// Half precision conversion instructions
7596//===----------------------------------------------------------------------===//
7597multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
7598let Predicates = [HasAVX, HasF16C] in {
7599  def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
7600             "vcvtph2ps\t{$src, $dst|$dst, $src}",
7601             [(set RC:$dst, (Int VR128:$src))]>,
7602             T8, OpSize, VEX;
7603  let neverHasSideEffects = 1, mayLoad = 1 in
7604  def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
7605             "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
7606}
7607}
7608
7609multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
7610let Predicates = [HasAVX, HasF16C] in {
7611  def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
7612               (ins RC:$src1, i32i8imm:$src2),
7613               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7614               [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
7615               TA, OpSize, VEX;
7616  let neverHasSideEffects = 1, mayLoad = 1 in
7617  def mr : Ii8<0x1D, MRMDestMem, (outs x86memop:$dst),
7618               (ins RC:$src1, i32i8imm:$src2),
7619               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7620               TA, OpSize, VEX;
7621}
7622}
7623
7624defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
7625defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
7626defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
7627defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
7628
7629//===----------------------------------------------------------------------===//
7630// AVX2 Instructions
7631//===----------------------------------------------------------------------===//
7632
7633/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
7634multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
7635                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
7636                 X86MemOperand x86memop> {
7637  let isCommutable = 1 in
7638  def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
7639        (ins RC:$src1, RC:$src2, u32u8imm:$src3),
7640        !strconcat(OpcodeStr,
7641            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
7642        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
7643        VEX_4V;
7644  def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
7645        (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
7646        !strconcat(OpcodeStr,
7647            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
7648        [(set RC:$dst,
7649          (IntId RC:$src1,
7650           (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
7651        VEX_4V;
7652}
7653
7654let isCommutable = 0 in {
7655defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
7656                                   VR128, memopv2i64, i128mem>;
7657defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
7658                                    VR256, memopv4i64, i256mem>;
7659}
7660
7661//===----------------------------------------------------------------------===//
7662// VPBROADCAST - Load from memory and broadcast to all elements of the
7663//               destination operand
7664//
7665multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
7666                          X86MemOperand x86memop, PatFrag ld_frag,
7667                          Intrinsic Int128, Intrinsic Int256> {
7668  def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
7669                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7670                  [(set VR128:$dst, (Int128 VR128:$src))]>, VEX;
7671  def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
7672                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7673                  [(set VR128:$dst,
7674                    (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
7675  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
7676                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7677                   [(set VR256:$dst, (Int256 VR128:$src))]>, VEX;
7678  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
7679                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
7680                   [(set VR256:$dst,
7681                    (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
7682}
7683
7684defm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
7685                                    int_x86_avx2_pbroadcastb_128,
7686                                    int_x86_avx2_pbroadcastb_256>;
7687defm VPBROADCASTW  : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
7688                                    int_x86_avx2_pbroadcastw_128,
7689                                    int_x86_avx2_pbroadcastw_256>;
7690defm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
7691                                    int_x86_avx2_pbroadcastd_128,
7692                                    int_x86_avx2_pbroadcastd_256>;
7693defm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
7694                                    int_x86_avx2_pbroadcastq_128,
7695                                    int_x86_avx2_pbroadcastq_256>;
7696
7697let Predicates = [HasAVX2] in {
7698  def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
7699          (VPBROADCASTBrm addr:$src)>;
7700  def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
7701          (VPBROADCASTBYrm addr:$src)>;
7702  def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
7703          (VPBROADCASTWrm addr:$src)>;
7704  def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
7705          (VPBROADCASTWYrm addr:$src)>;
7706  def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
7707          (VPBROADCASTDrm addr:$src)>;
7708  def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
7709          (VPBROADCASTDYrm addr:$src)>;
7710  def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
7711          (VPBROADCASTQrm addr:$src)>;
7712  def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
7713          (VPBROADCASTQYrm addr:$src)>;
7714}
7715
7716// AVX1 broadcast patterns
7717let Predicates = [HasAVX] in {
7718def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
7719          (VBROADCASTSSYrm addr:$src)>;
7720def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
7721          (VBROADCASTSDrm addr:$src)>;
7722def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
7723          (VBROADCASTSSYrm addr:$src)>;
7724def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
7725          (VBROADCASTSDrm addr:$src)>;
7726
7727def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
7728          (VBROADCASTSSrm addr:$src)>;
7729def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
7730          (VBROADCASTSSrm addr:$src)>;
7731}
7732
7733//===----------------------------------------------------------------------===//
7734// VPERM - Permute instructions
7735//
7736
7737multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
7738                     ValueType OpVT> {
7739  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
7740                   (ins VR256:$src1, VR256:$src2),
7741                   !strconcat(OpcodeStr,
7742                       "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7743                   [(set VR256:$dst,
7744                     (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V;
7745  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
7746                   (ins VR256:$src1, i256mem:$src2),
7747                   !strconcat(OpcodeStr,
7748                       "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7749                   [(set VR256:$dst,
7750                     (OpVT (X86VPermv VR256:$src1,
7751                            (bitconvert (mem_frag addr:$src2)))))]>,
7752                   VEX_4V;
7753}
7754
7755defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>;
7756let ExeDomain = SSEPackedSingle in
7757defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>;
7758
7759multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
7760                         ValueType OpVT> {
7761  def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
7762                     (ins VR256:$src1, i8imm:$src2),
7763                     !strconcat(OpcodeStr,
7764                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7765                     [(set VR256:$dst,
7766                       (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, VEX;
7767  def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
7768                     (ins i256mem:$src1, i8imm:$src2),
7769                     !strconcat(OpcodeStr,
7770                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7771                     [(set VR256:$dst,
7772                       (OpVT (X86VPermi (mem_frag addr:$src1),
7773                              (i8 imm:$src2))))]>, VEX;
7774}
7775
7776defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W;
7777let ExeDomain = SSEPackedDouble in
7778defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W;
7779
7780//===----------------------------------------------------------------------===//
7781// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
7782//
7783let AddedComplexity = 1 in {
7784def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
7785          (ins VR256:$src1, VR256:$src2, i8imm:$src3),
7786          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7787          [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
7788                            (i8 imm:$src3))))]>, VEX_4V;
7789def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
7790          (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
7791          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7792          [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
7793                             (i8 imm:$src3)))]>, VEX_4V;
7794}
7795
7796let Predicates = [HasAVX2], AddedComplexity = 1 in {
7797def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7798          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7799def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7800          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7801def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
7802          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
7803
7804def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
7805                  (i8 imm:$imm))),
7806          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
7807def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
7808                   (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
7809          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
7810def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
7811                  (i8 imm:$imm))),
7812          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
7813}
7814
7815
7816//===----------------------------------------------------------------------===//
7817// VINSERTI128 - Insert packed integer values
7818//
7819let neverHasSideEffects = 1 in {
7820def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
7821          (ins VR256:$src1, VR128:$src2, i8imm:$src3),
7822          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7823          []>,
7824          VEX_4V;
7825def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
7826          (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
7827          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7828          []>, VEX_4V;
7829}
7830
7831let Predicates = [HasAVX2], AddedComplexity = 1 in {
7832def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
7833                                   (i32 imm)),
7834          (VINSERTI128rr VR256:$src1, VR128:$src2,
7835                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7836def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
7837                                   (i32 imm)),
7838          (VINSERTI128rr VR256:$src1, VR128:$src2,
7839                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7840def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
7841                                   (i32 imm)),
7842          (VINSERTI128rr VR256:$src1, VR128:$src2,
7843                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7844def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
7845                                   (i32 imm)),
7846          (VINSERTI128rr VR256:$src1, VR128:$src2,
7847                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
7848}
7849
7850//===----------------------------------------------------------------------===//
7851// VEXTRACTI128 - Extract packed integer values
7852//
7853def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
7854          (ins VR256:$src1, i8imm:$src2),
7855          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7856          [(set VR128:$dst,
7857            (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
7858          VEX;
7859let neverHasSideEffects = 1, mayStore = 1 in
7860def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
7861          (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
7862          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
7863
7864let Predicates = [HasAVX2], AddedComplexity = 1 in {
7865def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7866          (v2i64 (VEXTRACTI128rr
7867                    (v4i64 VR256:$src1),
7868                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7869def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7870          (v4i32 (VEXTRACTI128rr
7871                    (v8i32 VR256:$src1),
7872                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7873def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7874          (v8i16 (VEXTRACTI128rr
7875                    (v16i16 VR256:$src1),
7876                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7877def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
7878          (v16i8 (VEXTRACTI128rr
7879                    (v32i8 VR256:$src1),
7880                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
7881}
7882
7883//===----------------------------------------------------------------------===//
7884// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
7885//
7886multiclass avx2_pmovmask<string OpcodeStr,
7887                         Intrinsic IntLd128, Intrinsic IntLd256,
7888                         Intrinsic IntSt128, Intrinsic IntSt256> {
7889  def rm  : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
7890             (ins VR128:$src1, i128mem:$src2),
7891             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7892             [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V;
7893  def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
7894             (ins VR256:$src1, i256mem:$src2),
7895             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7896             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V;
7897  def mr  : AVX28I<0x8e, MRMDestMem, (outs),
7898             (ins i128mem:$dst, VR128:$src1, VR128:$src2),
7899             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7900             [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
7901  def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
7902             (ins i256mem:$dst, VR256:$src1, VR256:$src2),
7903             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7904             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
7905}
7906
7907defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
7908                                int_x86_avx2_maskload_d,
7909                                int_x86_avx2_maskload_d_256,
7910                                int_x86_avx2_maskstore_d,
7911                                int_x86_avx2_maskstore_d_256>;
7912defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
7913                                int_x86_avx2_maskload_q,
7914                                int_x86_avx2_maskload_q_256,
7915                                int_x86_avx2_maskstore_q,
7916                                int_x86_avx2_maskstore_q_256>, VEX_W;
7917
7918
7919//===----------------------------------------------------------------------===//
7920// Variable Bit Shifts
7921//
7922multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
7923                          ValueType vt128, ValueType vt256> {
7924  def rr  : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
7925             (ins VR128:$src1, VR128:$src2),
7926             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7927             [(set VR128:$dst,
7928               (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
7929             VEX_4V;
7930  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
7931             (ins VR128:$src1, i128mem:$src2),
7932             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7933             [(set VR128:$dst,
7934               (vt128 (OpNode VR128:$src1,
7935                       (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>,
7936             VEX_4V;
7937  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
7938             (ins VR256:$src1, VR256:$src2),
7939             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7940             [(set VR256:$dst,
7941               (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
7942             VEX_4V;
7943  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
7944             (ins VR256:$src1, i256mem:$src2),
7945             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7946             [(set VR256:$dst,
7947               (vt256 (OpNode VR256:$src1,
7948                       (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
7949             VEX_4V;
7950}
7951
7952defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
7953defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
7954defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
7955defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
7956defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
7957