• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the PTX instructions in TableGen format.
11//
12//===----------------------------------------------------------------------===//
13
14include "NVPTXInstrFormats.td"
15
16// A NOP instruction
17def NOP : NVPTXInst<(outs), (ins), "", []>;
18
19// List of vector specific properties
20def isVecLD      : VecInstTypeEnum<1>;
21def isVecST      : VecInstTypeEnum<2>;
22def isVecBuild   : VecInstTypeEnum<3>;
23def isVecShuffle : VecInstTypeEnum<4>;
24def isVecExtract : VecInstTypeEnum<5>;
25def isVecInsert  : VecInstTypeEnum<6>;
26def isVecDest    : VecInstTypeEnum<7>;
27def isVecOther   : VecInstTypeEnum<15>;
28
29//===----------------------------------------------------------------------===//
30// NVPTX Operand Definitions.
31//===----------------------------------------------------------------------===//
32
33def brtarget    : Operand<OtherVT>;
34
35// CVT conversion modes
36// These must match the enum in NVPTX.h
37def CvtNONE : PatLeaf<(i32 0x0)>;
38def CvtRNI  : PatLeaf<(i32 0x1)>;
39def CvtRZI  : PatLeaf<(i32 0x2)>;
40def CvtRMI  : PatLeaf<(i32 0x3)>;
41def CvtRPI  : PatLeaf<(i32 0x4)>;
42def CvtRN   : PatLeaf<(i32 0x5)>;
43def CvtRZ   : PatLeaf<(i32 0x6)>;
44def CvtRM   : PatLeaf<(i32 0x7)>;
45def CvtRP   : PatLeaf<(i32 0x8)>;
46
47def CvtNONE_FTZ : PatLeaf<(i32 0x10)>;
48def CvtRNI_FTZ  : PatLeaf<(i32 0x11)>;
49def CvtRZI_FTZ  : PatLeaf<(i32 0x12)>;
50def CvtRMI_FTZ  : PatLeaf<(i32 0x13)>;
51def CvtRPI_FTZ  : PatLeaf<(i32 0x14)>;
52def CvtRN_FTZ   : PatLeaf<(i32 0x15)>;
53def CvtRZ_FTZ   : PatLeaf<(i32 0x16)>;
54def CvtRM_FTZ   : PatLeaf<(i32 0x17)>;
55def CvtRP_FTZ   : PatLeaf<(i32 0x18)>;
56
57def CvtSAT      : PatLeaf<(i32 0x20)>;
58def CvtSAT_FTZ  : PatLeaf<(i32 0x30)>;
59
60def CvtMode : Operand<i32> {
61  let PrintMethod = "printCvtMode";
62}
63
64// Compare modes
65// These must match the enum in NVPTX.h
66def CmpEQ   : PatLeaf<(i32 0)>;
67def CmpNE   : PatLeaf<(i32 1)>;
68def CmpLT   : PatLeaf<(i32 2)>;
69def CmpLE   : PatLeaf<(i32 3)>;
70def CmpGT   : PatLeaf<(i32 4)>;
71def CmpGE   : PatLeaf<(i32 5)>;
72def CmpLO   : PatLeaf<(i32 6)>;
73def CmpLS   : PatLeaf<(i32 7)>;
74def CmpHI   : PatLeaf<(i32 8)>;
75def CmpHS   : PatLeaf<(i32 9)>;
76def CmpEQU  : PatLeaf<(i32 10)>;
77def CmpNEU  : PatLeaf<(i32 11)>;
78def CmpLTU  : PatLeaf<(i32 12)>;
79def CmpLEU  : PatLeaf<(i32 13)>;
80def CmpGTU  : PatLeaf<(i32 14)>;
81def CmpGEU  : PatLeaf<(i32 15)>;
82def CmpNUM  : PatLeaf<(i32 16)>;
83def CmpNAN  : PatLeaf<(i32 17)>;
84
85def CmpEQ_FTZ   : PatLeaf<(i32 0x100)>;
86def CmpNE_FTZ   : PatLeaf<(i32 0x101)>;
87def CmpLT_FTZ   : PatLeaf<(i32 0x102)>;
88def CmpLE_FTZ   : PatLeaf<(i32 0x103)>;
89def CmpGT_FTZ   : PatLeaf<(i32 0x104)>;
90def CmpGE_FTZ   : PatLeaf<(i32 0x105)>;
91def CmpLO_FTZ   : PatLeaf<(i32 0x106)>;
92def CmpLS_FTZ   : PatLeaf<(i32 0x107)>;
93def CmpHI_FTZ   : PatLeaf<(i32 0x108)>;
94def CmpHS_FTZ   : PatLeaf<(i32 0x109)>;
95def CmpEQU_FTZ  : PatLeaf<(i32 0x10A)>;
96def CmpNEU_FTZ  : PatLeaf<(i32 0x10B)>;
97def CmpLTU_FTZ  : PatLeaf<(i32 0x10C)>;
98def CmpLEU_FTZ  : PatLeaf<(i32 0x10D)>;
99def CmpGTU_FTZ  : PatLeaf<(i32 0x10E)>;
100def CmpGEU_FTZ  : PatLeaf<(i32 0x10F)>;
101def CmpNUM_FTZ  : PatLeaf<(i32 0x110)>;
102def CmpNAN_FTZ  : PatLeaf<(i32 0x111)>;
103
104def CmpMode : Operand<i32> {
105  let PrintMethod = "printCmpMode";
106}
107
108def F32ConstZero : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
109    return CurDAG->getTargetConstantFP(0.0, MVT::f32);
110  }]>;
111def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
112    return CurDAG->getTargetConstantFP(1.0, MVT::f32);
113  }]>;
114
115//===----------------------------------------------------------------------===//
116// NVPTX Instruction Predicate Definitions
117//===----------------------------------------------------------------------===//
118
119
120def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">;
121def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">;
122def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">;
123def useAtomRedG32forGen32 :
124  Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">;
125def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">;
126def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">;
127def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">;
128def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">;
129def useAtomRedG64forGen64 :
130  Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">;
131def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">;
132def hasVote : Predicate<"Subtarget->hasVote()">;
133def hasDouble : Predicate<"Subtarget->hasDouble()">;
134def reqPTX20 : Predicate<"Subtarget->reqPTX20()">;
135def hasLDG : Predicate<"Subtarget->hasLDG()">;
136def hasLDU : Predicate<"Subtarget->hasLDU()">;
137def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">;
138
139def doF32FTZ : Predicate<"useF32FTZ()">;
140def doNoF32FTZ : Predicate<"!useF32FTZ()">;
141
142def doMulWide      : Predicate<"doMulWide">;
143
144def allowFMA : Predicate<"allowFMA()">;
145def noFMA : Predicate<"!allowFMA()">;
146
147def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
148def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
149
150def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
151def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
152
153def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
154def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
155
156def true : Predicate<"1">;
157
158def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
159
160
161//===----------------------------------------------------------------------===//
162// Some Common Instruction Class Templates
163//===----------------------------------------------------------------------===//
164
165multiclass I3<string OpcStr, SDNode OpNode> {
166  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
167                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
168                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
169                       Int64Regs:$b))]>;
170  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
171                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
172                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
173  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
174                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
175                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
176                       Int32Regs:$b))]>;
177  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
178                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
179                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
180  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
181                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
182                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
183                       Int16Regs:$b))]>;
184  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
185                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
186                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
187}
188
189multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
190   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
191       Int32Regs:$b),
192                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
193                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
194                        Int32Regs:$b))]>;
195   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
196                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
197                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
198}
199
200multiclass F3<string OpcStr, SDNode OpNode> {
201   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
202                      (ins Float64Regs:$a, Float64Regs:$b),
203                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
204                      [(set Float64Regs:$dst,
205                        (OpNode Float64Regs:$a, Float64Regs:$b))]>,
206                      Requires<[allowFMA]>;
207   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
208                      (ins Float64Regs:$a, f64imm:$b),
209                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
210                      [(set Float64Regs:$dst,
211                        (OpNode Float64Regs:$a, fpimm:$b))]>,
212                      Requires<[allowFMA]>;
213   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
214                      (ins Float32Regs:$a, Float32Regs:$b),
215                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
216                      [(set Float32Regs:$dst,
217                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
218                      Requires<[allowFMA, doF32FTZ]>;
219   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
220                      (ins Float32Regs:$a, f32imm:$b),
221                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
222                      [(set Float32Regs:$dst,
223                        (OpNode Float32Regs:$a, fpimm:$b))]>,
224                      Requires<[allowFMA, doF32FTZ]>;
225   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
226                      (ins Float32Regs:$a, Float32Regs:$b),
227                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
228                      [(set Float32Regs:$dst,
229                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
230                      Requires<[allowFMA]>;
231   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
232                      (ins Float32Regs:$a, f32imm:$b),
233                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
234                      [(set Float32Regs:$dst,
235                        (OpNode Float32Regs:$a, fpimm:$b))]>,
236                      Requires<[allowFMA]>;
237}
238
239multiclass F3_rn<string OpcStr, SDNode OpNode> {
240   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
241                      (ins Float64Regs:$a, Float64Regs:$b),
242                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
243                      [(set Float64Regs:$dst,
244                        (OpNode Float64Regs:$a, Float64Regs:$b))]>,
245                      Requires<[noFMA]>;
246   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
247                      (ins Float64Regs:$a, f64imm:$b),
248                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
249                      [(set Float64Regs:$dst,
250                        (OpNode Float64Regs:$a, fpimm:$b))]>,
251                      Requires<[noFMA]>;
252   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
253                      (ins Float32Regs:$a, Float32Regs:$b),
254                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
255                      [(set Float32Regs:$dst,
256                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
257                      Requires<[noFMA, doF32FTZ]>;
258   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
259                      (ins Float32Regs:$a, f32imm:$b),
260                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
261                      [(set Float32Regs:$dst,
262                        (OpNode Float32Regs:$a, fpimm:$b))]>,
263                      Requires<[noFMA, doF32FTZ]>;
264   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
265                      (ins Float32Regs:$a, Float32Regs:$b),
266                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
267                      [(set Float32Regs:$dst,
268                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
269                      Requires<[noFMA]>;
270   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
271                      (ins Float32Regs:$a, f32imm:$b),
272                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
273                      [(set Float32Regs:$dst,
274                        (OpNode Float32Regs:$a, fpimm:$b))]>,
275                      Requires<[noFMA]>;
276}
277
278multiclass F2<string OpcStr, SDNode OpNode> {
279   def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
280                      !strconcat(OpcStr, ".f64 \t$dst, $a;"),
281                      [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
282   def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
283                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
284                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
285                      Requires<[doF32FTZ]>;
286   def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
287                      !strconcat(OpcStr, ".f32 \t$dst, $a;"),
288                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
289}
290
291//===----------------------------------------------------------------------===//
292// NVPTX Instructions.
293//===----------------------------------------------------------------------===//
294
295//-----------------------------------
296// General Type Conversion
297//-----------------------------------
298
299let hasSideEffects = 0 in {
300// Generate a cvt to the given type from all possible types.
301// Each instance takes a CvtMode immediate that defines the conversion mode to
302// use.  It can be CvtNONE to omit a conversion mode.
303multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
304  def _s16 : NVPTXInst<(outs RC:$dst),
305                       (ins Int16Regs:$src, CvtMode:$mode),
306                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
307                       FromName, ".s16\t$dst, $src;"),
308                       []>;
309  def _u16 : NVPTXInst<(outs RC:$dst),
310                       (ins Int16Regs:$src, CvtMode:$mode),
311                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
312                       FromName, ".u16\t$dst, $src;"),
313                       []>;
314  def _f16 : NVPTXInst<(outs RC:$dst),
315                       (ins Int16Regs:$src, CvtMode:$mode),
316                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
317                       FromName, ".f16\t$dst, $src;"),
318                       []>;
319  def _s32 : NVPTXInst<(outs RC:$dst),
320                       (ins Int32Regs:$src, CvtMode:$mode),
321                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
322                       FromName, ".s32\t$dst, $src;"),
323                       []>;
324  def _u32 : NVPTXInst<(outs RC:$dst),
325                       (ins Int32Regs:$src, CvtMode:$mode),
326                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
327                       FromName, ".u32\t$dst, $src;"),
328                       []>;
329  def _s64 : NVPTXInst<(outs RC:$dst),
330                       (ins Int64Regs:$src, CvtMode:$mode),
331                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
332                       FromName, ".s64\t$dst, $src;"),
333                       []>;
334  def _u64 : NVPTXInst<(outs RC:$dst),
335                       (ins Int64Regs:$src, CvtMode:$mode),
336                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
337                       FromName, ".u64\t$dst, $src;"),
338                       []>;
339  def _f32 : NVPTXInst<(outs RC:$dst),
340                       (ins Float32Regs:$src, CvtMode:$mode),
341                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
342                       FromName, ".f32\t$dst, $src;"),
343                       []>;
344  def _f64 : NVPTXInst<(outs RC:$dst),
345                       (ins Float64Regs:$src, CvtMode:$mode),
346                       !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
347                       FromName, ".f64\t$dst, $src;"),
348                       []>;
349}
350
351// Generate a cvt to all possible types.
352defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
353defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
354defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
355defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
356defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
357defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
358defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
359defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
360defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
361
362// This set of cvt is different from the above. The type of the source
363// and target are the same.
364//
365def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
366                        "cvt.s16.s8 \t$dst, $src;", []>;
367def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
368                        "cvt.s32.s8 \t$dst, $src;", []>;
369def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
370                        "cvt.s32.s16 \t$dst, $src;", []>;
371def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
372                        "cvt.s64.s8 \t$dst, $src;", []>;
373def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
374                        "cvt.s64.s16 \t$dst, $src;", []>;
375def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
376                        "cvt.s64.s32 \t$dst, $src;", []>;
377}
378
379//-----------------------------------
380// Integer Arithmetic
381//-----------------------------------
382
383multiclass ADD_SUB_i1<SDNode OpNode> {
384   def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
385          "xor.pred \t$dst, $a, $b;",
386      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
387   def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
388          "xor.pred \t$dst, $a, $b;",
389      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
390}
391
392defm ADD_i1 : ADD_SUB_i1<add>;
393defm SUB_i1 : ADD_SUB_i1<sub>;
394
395
396defm ADD : I3<"add.s", add>;
397defm SUB : I3<"sub.s", sub>;
398
399defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
400defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
401
402defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
403defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
404
405//mul.wide PTX instruction
406def SInt32Const : PatLeaf<(imm), [{
407  const APInt &v = N->getAPIntValue();
408  if (v.isSignedIntN(32))
409    return true;
410  return false;
411}]>;
412
413def UInt32Const : PatLeaf<(imm), [{
414  const APInt &v = N->getAPIntValue();
415  if (v.isIntN(32))
416    return true;
417  return false;
418}]>;
419
420def SInt16Const : PatLeaf<(imm), [{
421  const APInt &v = N->getAPIntValue();
422  if (v.isSignedIntN(16))
423    return true;
424  return false;
425}]>;
426
427def UInt16Const : PatLeaf<(imm), [{
428  const APInt &v = N->getAPIntValue();
429  if (v.isIntN(16))
430    return true;
431  return false;
432}]>;
433
434def Int5Const : PatLeaf<(imm), [{
435  const APInt &v = N->getAPIntValue();
436  // Check if 0 <= v < 32
437  // Only then the result from (x << v) will be i32
438  if (v.sge(0) && v.slt(32))
439    return true;
440  return false;
441}]>;
442
443def Int4Const : PatLeaf<(imm), [{
444  const APInt &v = N->getAPIntValue();
445  // Check if 0 <= v < 16
446  // Only then the result from (x << v) will be i16
447  if (v.sge(0) && v.slt(16))
448    return true;
449  return false;
450}]>;
451
452def SHL2MUL32 : SDNodeXForm<imm, [{
453  const APInt &v = N->getAPIntValue();
454  APInt temp(32, 1);
455  return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32);
456}]>;
457
458def SHL2MUL16 : SDNodeXForm<imm, [{
459  const APInt &v = N->getAPIntValue();
460  APInt temp(16, 1);
461  return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16);
462}]>;
463
464def MULWIDES64
465  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
466              "mul.wide.s32 \t$dst, $a, $b;", []>;
467def MULWIDES64Imm
468  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
469                           "mul.wide.s32 \t$dst, $a, $b;", []>;
470def MULWIDES64Imm64
471  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
472                           "mul.wide.s32 \t$dst, $a, $b;", []>;
473
474def MULWIDEU64
475  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
476              "mul.wide.u32 \t$dst, $a, $b;", []>;
477def MULWIDEU64Imm
478  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
479                           "mul.wide.u32 \t$dst, $a, $b;", []>;
480def MULWIDEU64Imm64
481  : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
482                           "mul.wide.u32 \t$dst, $a, $b;", []>;
483
484def MULWIDES32
485  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
486                           "mul.wide.s16 \t$dst, $a, $b;", []>;
487def MULWIDES32Imm
488  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
489              "mul.wide.s16 \t$dst, $a, $b;", []>;
490def MULWIDES32Imm32
491  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
492                           "mul.wide.s16 \t$dst, $a, $b;", []>;
493
494def MULWIDEU32
495  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
496              "mul.wide.u16 \t$dst, $a, $b;", []>;
497def MULWIDEU32Imm
498  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
499                           "mul.wide.u16 \t$dst, $a, $b;", []>;
500def MULWIDEU32Imm32
501  : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
502                            "mul.wide.u16 \t$dst, $a, $b;", []>;
503
504def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
505          (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
506          Requires<[doMulWide]>;
507def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
508          (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
509          Requires<[doMulWide]>;
510
511def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
512          (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
513          Requires<[doMulWide]>;
514def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
515          (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
516          Requires<[doMulWide]>;
517
518def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
519          (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
520          Requires<[doMulWide]>;
521def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
522          (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
523          Requires<[doMulWide]>;
524
525def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
526          (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
527      Requires<[doMulWide]>;
528def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
529          (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
530          Requires<[doMulWide]>;
531
532def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
533          (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
534      Requires<[doMulWide]>;
535def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
536          (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
537          Requires<[doMulWide]>;
538
539def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
540          (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
541      Requires<[doMulWide]>;
542def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
543          (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
544          Requires<[doMulWide]>;
545
546
547def SDTMulWide
548  : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
549def mul_wide_signed
550  : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
551def mul_wide_unsigned
552  : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
553
554def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
555          (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
556      Requires<[doMulWide]>;
557def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
558          (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
559          Requires<[doMulWide]>;
560def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
561          (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
562          Requires<[doMulWide]>;
563def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
564          (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
565          Requires<[doMulWide]>;
566
567
568def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
569          (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
570          Requires<[doMulWide]>;
571def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
572          (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
573          Requires<[doMulWide]>;
574def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
575          (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
576          Requires<[doMulWide]>;
577def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
578          (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
579          Requires<[doMulWide]>;
580
581defm MULT : I3<"mul.lo.s", mul>;
582
583defm MULTHS : I3<"mul.hi.s", mulhs>;
584defm MULTHU : I3<"mul.hi.u", mulhu>;
585
586defm SDIV : I3<"div.s", sdiv>;
587defm UDIV : I3<"div.u", udiv>;
588
589defm SREM : I3<"rem.s", srem>;
590// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
591defm UREM : I3<"rem.u", urem>;
592// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
593
594def SDTIMAD
595  : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
596                         SDTCisInt<2>, SDTCisSameAs<0, 2>,
597                         SDTCisSameAs<0, 3>]>;
598def imad
599  : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
600
601def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
602                      (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
603                      "mad.lo.s16 \t$dst, $a, $b, $c;",
604                      [(set Int16Regs:$dst,
605                         (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
606def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
607                      (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
608                      "mad.lo.s16 \t$dst, $a, $b, $c;",
609                      [(set Int16Regs:$dst,
610                         (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
611def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
612                      (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
613                      "mad.lo.s16 \t$dst, $a, $b, $c;",
614                      [(set Int16Regs:$dst,
615                        (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
616def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
617    (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
618                      "mad.lo.s16 \t$dst, $a, $b, $c;",
619                      [(set Int16Regs:$dst,
620                        (imad Int16Regs:$a, imm:$b, imm:$c))]>;
621
622def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
623                      (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
624                      "mad.lo.s32 \t$dst, $a, $b, $c;",
625                      [(set Int32Regs:$dst,
626                        (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
627def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
628                      (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
629                      "mad.lo.s32 \t$dst, $a, $b, $c;",
630                      [(set Int32Regs:$dst,
631                        (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
632def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
633                      (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
634                      "mad.lo.s32 \t$dst, $a, $b, $c;",
635                      [(set Int32Regs:$dst,
636                        (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
637def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
638                      (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
639                      "mad.lo.s32 \t$dst, $a, $b, $c;",
640                      [(set Int32Regs:$dst,
641                        (imad Int32Regs:$a, imm:$b, imm:$c))]>;
642
643def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
644                      (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
645                      "mad.lo.s64 \t$dst, $a, $b, $c;",
646                      [(set Int64Regs:$dst,
647                        (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
648def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
649                      (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
650                      "mad.lo.s64 \t$dst, $a, $b, $c;",
651                      [(set Int64Regs:$dst,
652                        (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
653def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
654                      (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
655                      "mad.lo.s64 \t$dst, $a, $b, $c;",
656                      [(set Int64Regs:$dst,
657                        (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
658def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
659                      (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
660                      "mad.lo.s64 \t$dst, $a, $b, $c;",
661                      [(set Int64Regs:$dst,
662                        (imad Int64Regs:$a, imm:$b, imm:$c))]>;
663
664def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
665                     "neg.s16 \t$dst, $src;",
666         [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
667def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
668                     "neg.s32 \t$dst, $src;",
669         [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
670def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
671                     "neg.s64 \t$dst, $src;",
672         [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
673
674//-----------------------------------
675// Floating Point Arithmetic
676//-----------------------------------
677
678// Constant 1.0f
679def FloatConst1 : PatLeaf<(fpimm), [{
680    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
681      return false;
682    float f = (float)N->getValueAPF().convertToFloat();
683    return (f==1.0f);
684}]>;
685// Constand (double)1.0
686def DoubleConst1 : PatLeaf<(fpimm), [{
687    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
688      return false;
689    double d = (double)N->getValueAPF().convertToDouble();
690    return (d==1.0);
691}]>;
692
693defm FADD : F3<"add", fadd>;
694defm FSUB : F3<"sub", fsub>;
695defm FMUL : F3<"mul", fmul>;
696
697defm FADD_rn : F3_rn<"add", fadd>;
698defm FSUB_rn : F3_rn<"sub", fsub>;
699defm FMUL_rn : F3_rn<"mul", fmul>;
700
701defm FABS : F2<"abs", fabs>;
702defm FNEG : F2<"neg", fneg>;
703defm FSQRT : F2<"sqrt.rn", fsqrt>;
704
705//
706// F64 division
707//
708def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
709                      (ins f64imm:$a, Float64Regs:$b),
710                      "rcp.rn.f64 \t$dst, $b;",
711                      [(set Float64Regs:$dst,
712                        (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
713def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
714                      (ins Float64Regs:$a, Float64Regs:$b),
715                      "div.rn.f64 \t$dst, $a, $b;",
716                      [(set Float64Regs:$dst,
717                        (fdiv Float64Regs:$a, Float64Regs:$b))]>;
718def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
719                      (ins Float64Regs:$a, f64imm:$b),
720                      "div.rn.f64 \t$dst, $a, $b;",
721                      [(set Float64Regs:$dst,
722                        (fdiv Float64Regs:$a, fpimm:$b))]>;
723
724//
725// F32 Approximate reciprocal
726//
727def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
728                      (ins f32imm:$a, Float32Regs:$b),
729                      "rcp.approx.ftz.f32 \t$dst, $b;",
730                      [(set Float32Regs:$dst,
731                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
732                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
733def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
734                        (ins f32imm:$a, Float32Regs:$b),
735                       "rcp.approx.f32 \t$dst, $b;",
736                      [(set Float32Regs:$dst,
737                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
738                      Requires<[do_DIVF32_APPROX]>;
739//
740// F32 Approximate division
741//
742def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
743                      (ins Float32Regs:$a, Float32Regs:$b),
744                      "div.approx.ftz.f32 \t$dst, $a, $b;",
745                      [(set Float32Regs:$dst,
746                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
747                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
748def FDIV32approxri_ftz : NVPTXInst<(outs Float32Regs:$dst),
749                      (ins Float32Regs:$a, f32imm:$b),
750                      "div.approx.ftz.f32 \t$dst, $a, $b;",
751                      [(set Float32Regs:$dst,
752                        (fdiv Float32Regs:$a, fpimm:$b))]>,
753                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
754def FDIV32approxrr     : NVPTXInst<(outs Float32Regs:$dst),
755                      (ins Float32Regs:$a, Float32Regs:$b),
756                      "div.approx.f32 \t$dst, $a, $b;",
757                      [(set Float32Regs:$dst,
758                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
759                      Requires<[do_DIVF32_APPROX]>;
760def FDIV32approxri : NVPTXInst<(outs Float32Regs:$dst),
761                      (ins Float32Regs:$a, f32imm:$b),
762                      "div.approx.f32 \t$dst, $a, $b;",
763                      [(set Float32Regs:$dst,
764                        (fdiv Float32Regs:$a, fpimm:$b))]>,
765                      Requires<[do_DIVF32_APPROX]>;
766//
767// F32 Semi-accurate reciprocal
768//
769// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
770//
771def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
772                      (ins f32imm:$a, Float32Regs:$b),
773                      "rcp.approx.ftz.f32 \t$dst, $b;",
774                      [(set Float32Regs:$dst,
775                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
776                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
777def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
778                      (ins f32imm:$a, Float32Regs:$b),
779                      "rcp.approx.f32 \t$dst, $b;",
780                      [(set Float32Regs:$dst,
781                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
782                      Requires<[do_DIVF32_FULL]>;
783//
784// F32 Semi-accurate division
785//
786def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
787                      (ins Float32Regs:$a, Float32Regs:$b),
788                      "div.full.ftz.f32 \t$dst, $a, $b;",
789                      [(set Float32Regs:$dst,
790                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
791                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
792def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
793                      (ins Float32Regs:$a, f32imm:$b),
794                      "div.full.ftz.f32 \t$dst, $a, $b;",
795                      [(set Float32Regs:$dst,
796                        (fdiv Float32Regs:$a, fpimm:$b))]>,
797                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
798def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
799                      (ins Float32Regs:$a, Float32Regs:$b),
800                      "div.full.f32 \t$dst, $a, $b;",
801                      [(set Float32Regs:$dst,
802                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
803                      Requires<[do_DIVF32_FULL]>;
804def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
805                      (ins Float32Regs:$a, f32imm:$b),
806                      "div.full.f32 \t$dst, $a, $b;",
807                      [(set Float32Regs:$dst,
808                        (fdiv Float32Regs:$a, fpimm:$b))]>,
809                      Requires<[do_DIVF32_FULL]>;
810//
811// F32 Accurate reciprocal
812//
813def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
814                        (ins f32imm:$a, Float32Regs:$b),
815                       "rcp.rn.ftz.f32 \t$dst, $b;",
816                      [(set Float32Regs:$dst,
817                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
818                      Requires<[reqPTX20, doF32FTZ]>;
819def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
820                      (ins f32imm:$a, Float32Regs:$b),
821                       "rcp.rn.f32 \t$dst, $b;",
822                      [(set Float32Regs:$dst,
823                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
824                      Requires<[reqPTX20]>;
825//
826// F32 Accurate division
827//
828def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
829                      (ins Float32Regs:$a, Float32Regs:$b),
830                      "div.rn.ftz.f32 \t$dst, $a, $b;",
831                      [(set Float32Regs:$dst,
832                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
833                      Requires<[doF32FTZ, reqPTX20]>;
834def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
835                      (ins Float32Regs:$a, f32imm:$b),
836                      "div.rn.ftz.f32 \t$dst, $a, $b;",
837                      [(set Float32Regs:$dst,
838                        (fdiv Float32Regs:$a, fpimm:$b))]>,
839                      Requires<[doF32FTZ, reqPTX20]>;
840def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
841                      (ins Float32Regs:$a, Float32Regs:$b),
842                      "div.rn.f32 \t$dst, $a, $b;",
843                      [(set Float32Regs:$dst,
844                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
845                      Requires<[reqPTX20]>;
846def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
847                      (ins Float32Regs:$a, f32imm:$b),
848                      "div.rn.f32 \t$dst, $a, $b;",
849                      [(set Float32Regs:$dst,
850                        (fdiv Float32Regs:$a, fpimm:$b))]>,
851                      Requires<[reqPTX20]>;
852
853//
854// F32 rsqrt
855//
856
857def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
858                       "rsqrt.approx.f32 \t$dst, $b;", []>;
859
860def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
861         (RSQRTF32approx1r Float32Regs:$b)>,
862         Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
863
864multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
865   def rrr : NVPTXInst<(outs Float32Regs:$dst),
866                      (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
867                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
868                      [(set Float32Regs:$dst,
869                        (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>,
870                      Requires<[Pred]>;
871   def rri : NVPTXInst<(outs Float32Regs:$dst),
872                      (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
873                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
874                      [(set Float32Regs:$dst,
875                        (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>,
876                      Requires<[Pred]>;
877   def rir : NVPTXInst<(outs Float32Regs:$dst),
878                      (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
879                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
880                      [(set Float32Regs:$dst,
881                        (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>,
882                      Requires<[Pred]>;
883   def rii : NVPTXInst<(outs Float32Regs:$dst),
884                      (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
885                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
886                      [(set Float32Regs:$dst,
887                        (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>,
888                      Requires<[Pred]>;
889}
890
891multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
892   def rrr : NVPTXInst<(outs Float64Regs:$dst),
893                      (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
894                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
895                      [(set Float64Regs:$dst,
896                        (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>,
897                      Requires<[Pred]>;
898   def rri : NVPTXInst<(outs Float64Regs:$dst),
899                      (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
900                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
901                      [(set Float64Regs:$dst,
902                        (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>,
903                      Requires<[Pred]>;
904   def rir : NVPTXInst<(outs Float64Regs:$dst),
905                      (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
906                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
907                      [(set Float64Regs:$dst,
908                        (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>,
909                      Requires<[Pred]>;
910   def rii : NVPTXInst<(outs Float64Regs:$dst),
911                      (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
912                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
913                      [(set Float64Regs:$dst,
914                        (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>,
915                      Requires<[Pred]>;
916}
917
918defm FMA32_ftz  : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>;
919defm FMA32  : FPCONTRACT32<"fma.rn.f32", true>;
920defm FMA64  : FPCONTRACT64<"fma.rn.f64", true>;
921
922def SINF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
923                      "sin.approx.f32 \t$dst, $src;",
924                      [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
925def COSF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
926                      "cos.approx.f32 \t$dst, $src;",
927                      [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
928
929// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y))
930// e.g. "poor man's fmod()"
931
932// frem - f32 FTZ
933def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
934          (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32
935            (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ),
936             Float32Regs:$y))>,
937          Requires<[doF32FTZ]>;
938def : Pat<(frem Float32Regs:$x, fpimm:$y),
939          (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32
940            (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ),
941             fpimm:$y))>,
942          Requires<[doF32FTZ]>;
943
944// frem - f32
945def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
946          (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32
947            (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI),
948             Float32Regs:$y))>;
949def : Pat<(frem Float32Regs:$x, fpimm:$y),
950          (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32
951            (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI),
952             fpimm:$y))>;
953
954// frem - f64
955def : Pat<(frem Float64Regs:$x, Float64Regs:$y),
956          (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64
957            (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI),
958             Float64Regs:$y))>;
959def : Pat<(frem Float64Regs:$x, fpimm:$y),
960          (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64
961            (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI),
962             fpimm:$y))>;
963
964//-----------------------------------
965// Logical Arithmetic
966//-----------------------------------
967
968multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
969  def b1rr:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
970                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
971                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
972  def b1ri:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
973                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
974                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
975  def b16rr:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
976                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
977                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
978                        Int16Regs:$b))]>;
979  def b16ri:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
980                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
981                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
982  def b32rr:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
983                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
984                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
985                        Int32Regs:$b))]>;
986  def b32ri:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
987                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
988                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
989  def b64rr:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
990                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
991                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
992                        Int64Regs:$b))]>;
993  def b64ri:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
994                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
995                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
996}
997
998defm OR  : LOG_FORMAT<"or", or>;
999defm AND : LOG_FORMAT<"and", and>;
1000defm XOR : LOG_FORMAT<"xor", xor>;
1001
1002def NOT1:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1003                      "not.pred \t$dst, $src;",
1004                      [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1005def NOT16:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1006                      "not.b16 \t$dst, $src;",
1007                      [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1008def NOT32:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1009                      "not.b32 \t$dst, $src;",
1010                      [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1011def NOT64:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1012                      "not.b64 \t$dst, $src;",
1013                      [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1014
1015// For shifts, the second src operand must be 32-bit value
1016multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1017   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1018                      Int32Regs:$b),
1019                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1020                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1021                        Int32Regs:$b))]>;
1022   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1023                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1024                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1025                        (i32 imm:$b)))]>;
1026   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1027                      Int32Regs:$b),
1028                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1029                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1030                        Int32Regs:$b))]>;
1031   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1032                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1033                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1034                        (i32 imm:$b)))]>;
1035   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1036                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1037                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1038                        (i32 imm:$b)))]>;
1039   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1040                      Int32Regs:$b),
1041                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1042                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1043                        Int32Regs:$b))]>;
1044   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1045                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1046                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1047                        (i32 imm:$b)))]>;
1048}
1049
1050defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1051
1052// For shifts, the second src operand must be 32-bit value
1053// Need to add cvt for the 8-bits.
1054multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1055   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1056                      Int32Regs:$b),
1057                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1058                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1059                        Int32Regs:$b))]>;
1060   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1061                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1062                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1063                        (i32 imm:$b)))]>;
1064   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1065                      Int32Regs:$b),
1066                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1067                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1068                        Int32Regs:$b))]>;
1069   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1070                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1071                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1072                        (i32 imm:$b)))]>;
1073   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1074                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1075                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1076                        (i32 imm:$b)))]>;
1077   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1078                      Int32Regs:$b),
1079                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1080                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1081                        Int32Regs:$b))]>;
1082   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1083                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1084                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1085                        (i32 imm:$b)))]>;
1086}
1087
1088defm SRA : RSHIFT_FORMAT<"shr.s", sra>;
1089defm SRL : RSHIFT_FORMAT<"shr.u", srl>;
1090
1091//
1092// Rotate: use ptx shf instruction if available.
1093//
1094
1095// 32 bit r2 = rotl r1, n
1096//    =>
1097//        r2 = shf.l r1, r1, n
1098def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1099                             (ins Int32Regs:$src, i32imm:$amt),
1100              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1101    [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
1102    Requires<[hasHWROT32]> ;
1103
1104def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1105                             (ins Int32Regs:$src, Int32Regs:$amt),
1106              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
1107    [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1108    Requires<[hasHWROT32]>;
1109
1110// 32 bit r2 = rotr r1, n
1111//    =>
1112//        r2 = shf.r r1, r1, n
1113def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
1114                             (ins Int32Regs:$src, i32imm:$amt),
1115              "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1116    [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
1117    Requires<[hasHWROT32]>;
1118
1119def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
1120                             (ins Int32Regs:$src, Int32Regs:$amt),
1121              "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
1122    [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1123    Requires<[hasHWROT32]>;
1124
1125//
1126// Rotate: if ptx shf instruction is not available, then use shift+add
1127//
1128// 32bit
1129def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1130  (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1131    !strconcat("{{\n\t",
1132    !strconcat(".reg .b32 %lhs;\n\t",
1133    !strconcat(".reg .b32 %rhs;\n\t",
1134    !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1135    !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1136    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1137    !strconcat("}}", ""))))))),
1138    []>;
1139
1140def SUB_FRM_32 : SDNodeXForm<imm, [{
1141    return CurDAG->getTargetConstant(32-N->getZExtValue(), SDLoc(N), MVT::i32);
1142}]>;
1143
1144def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1145          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
1146      Requires<[noHWROT32]>;
1147def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1148          (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
1149      Requires<[noHWROT32]>;
1150
1151def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1152    Int32Regs:$amt),
1153    !strconcat("{{\n\t",
1154    !strconcat(".reg .b32 %lhs;\n\t",
1155    !strconcat(".reg .b32 %rhs;\n\t",
1156    !strconcat(".reg .b32 %amt2;\n\t",
1157    !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1158    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1159    !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1160    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1161    !strconcat("}}", ""))))))))),
1162    [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
1163    Requires<[noHWROT32]>;
1164
1165def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1166    Int32Regs:$amt),
1167    !strconcat("{{\n\t",
1168    !strconcat(".reg .b32 %lhs;\n\t",
1169    !strconcat(".reg .b32 %rhs;\n\t",
1170    !strconcat(".reg .b32 %amt2;\n\t",
1171    !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1172    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1173    !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1174    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1175    !strconcat("}}", ""))))))))),
1176    [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
1177    Requires<[noHWROT32]>;
1178
1179// 64bit
1180def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1181    i32imm:$amt1, i32imm:$amt2),
1182    !strconcat("{{\n\t",
1183    !strconcat(".reg .b64 %lhs;\n\t",
1184    !strconcat(".reg .b64 %rhs;\n\t",
1185    !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1186    !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1187    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1188    !strconcat("}}", ""))))))),
1189    []>;
1190
1191def SUB_FRM_64 : SDNodeXForm<imm, [{
1192    return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32);
1193}]>;
1194
1195def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1196          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1197def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1198          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1199
1200def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1201    Int32Regs:$amt),
1202    !strconcat("{{\n\t",
1203    !strconcat(".reg .b64 %lhs;\n\t",
1204    !strconcat(".reg .b64 %rhs;\n\t",
1205    !strconcat(".reg .u32 %amt2;\n\t",
1206    !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1207    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1208    !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1209    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1210    !strconcat("}}", ""))))))))),
1211    [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1212
1213def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1214    Int32Regs:$amt),
1215    !strconcat("{{\n\t",
1216    !strconcat(".reg .b64 %lhs;\n\t",
1217    !strconcat(".reg .b64 %rhs;\n\t",
1218    !strconcat(".reg .u32 %amt2;\n\t",
1219    !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1220    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1221    !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1222    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1223    !strconcat("}}", ""))))))))),
1224    [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1225
1226// BFE - bit-field extract
1227
1228multiclass BFE<string TyStr, RegisterClass RC> {
1229  // BFE supports both 32-bit and 64-bit values, but the start and length
1230  // operands are always 32-bit
1231  def rrr
1232    : NVPTXInst<(outs RC:$d),
1233                (ins RC:$a, Int32Regs:$b, Int32Regs:$c),
1234                !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1235  def rri
1236    : NVPTXInst<(outs RC:$d),
1237                (ins RC:$a, Int32Regs:$b, i32imm:$c),
1238                !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1239  def rii
1240    : NVPTXInst<(outs RC:$d),
1241                (ins RC:$a, i32imm:$b, i32imm:$c),
1242                !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
1243}
1244
1245defm BFE_S32 : BFE<"s32", Int32Regs>;
1246defm BFE_U32 : BFE<"u32", Int32Regs>;
1247defm BFE_S64 : BFE<"s64", Int64Regs>;
1248defm BFE_U64 : BFE<"u64", Int64Regs>;
1249
1250//-----------------------------------
1251// General Comparison
1252//-----------------------------------
1253
1254// General setp instructions
1255multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1256  def rr : NVPTXInst<(outs Int1Regs:$dst),
1257                     (ins RC:$a, RC:$b, CmpMode:$cmp),
1258            !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1259                     []>;
1260  def ri : NVPTXInst<(outs Int1Regs:$dst),
1261                     (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1262            !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1263                     []>;
1264  def ir : NVPTXInst<(outs Int1Regs:$dst),
1265                     (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1266            !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, "\t$dst, $a, $b;"),
1267                     []>;
1268}
1269
1270defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
1271defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>;
1272defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>;
1273defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>;
1274defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>;
1275defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>;
1276defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>;
1277defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>;
1278defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
1279defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
1280defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
1281
1282// General set instructions
1283multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
1284  def rr : NVPTXInst<(outs Int32Regs:$dst),
1285                     (ins RC:$a, RC:$b, CmpMode:$cmp),
1286                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1287  def ri : NVPTXInst<(outs Int32Regs:$dst),
1288                     (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
1289                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1290  def ir : NVPTXInst<(outs Int32Regs:$dst),
1291                     (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
1292                     !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
1293}
1294
1295defm SET_b16 : SET<"b16", Int16Regs, i16imm>;
1296defm SET_s16 : SET<"s16", Int16Regs, i16imm>;
1297defm SET_u16 : SET<"u16", Int16Regs, i16imm>;
1298defm SET_b32 : SET<"b32", Int32Regs, i32imm>;
1299defm SET_s32 : SET<"s32", Int32Regs, i32imm>;
1300defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
1301defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
1302defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
1303defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
1304defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
1305defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
1306
1307//-----------------------------------
1308// General Selection
1309//-----------------------------------
1310
1311// General selp instructions
1312multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
1313  def rr : NVPTXInst<(outs RC:$dst),
1314                     (ins RC:$a, RC:$b, Int1Regs:$p),
1315                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1316  def ri : NVPTXInst<(outs RC:$dst),
1317                     (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1318                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1319  def ir : NVPTXInst<(outs RC:$dst),
1320                     (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1321                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1322  def ii : NVPTXInst<(outs RC:$dst),
1323                     (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1324                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
1325}
1326
1327multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
1328                        SDNode ImmNode> {
1329  def rr : NVPTXInst<(outs RC:$dst),
1330                     (ins RC:$a, RC:$b, Int1Regs:$p),
1331                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1332                     [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
1333  def ri : NVPTXInst<(outs RC:$dst),
1334                     (ins RC:$a, ImmCls:$b, Int1Regs:$p),
1335                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1336                     [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
1337  def ir : NVPTXInst<(outs RC:$dst),
1338                     (ins ImmCls:$a, RC:$b, Int1Regs:$p),
1339                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1340                     [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
1341  def ii : NVPTXInst<(outs RC:$dst),
1342                     (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
1343                     !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
1344                 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
1345}
1346
1347defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>;
1348defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
1349defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
1350defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>;
1351defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>;
1352defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
1353defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>;
1354defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
1355defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
1356defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
1357defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
1358
1359//
1360// Funnnel shift in clamp mode
1361//
1362// - SDNodes are created so they can be used in the DAG code,
1363//   e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
1364//
1365def SDTIntShiftDOp: SDTypeProfile<1, 3,
1366                                  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
1367                                   SDTCisInt<0>, SDTCisInt<3>]>;
1368def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
1369def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
1370
1371def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),
1372                             (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1373                  "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
1374                  [(set Int32Regs:$dst,
1375                     (FUN_SHFL_CLAMP Int32Regs:$lo,
1376                        Int32Regs:$hi, Int32Regs:$amt))]>;
1377
1378def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),
1379                             (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
1380                  "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
1381                  [(set Int32Regs:$dst,
1382                     (FUN_SHFR_CLAMP Int32Regs:$lo,
1383                        Int32Regs:$hi, Int32Regs:$amt))]>;
1384
1385//-----------------------------------
1386// Data Movement (Load / Store, Move)
1387//-----------------------------------
1388
1389def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1390  [SDNPWantRoot]>;
1391def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1392  [SDNPWantRoot]>;
1393
1394def MEMri : Operand<i32> {
1395  let PrintMethod = "printMemOperand";
1396  let MIOperandInfo = (ops Int32Regs, i32imm);
1397}
1398def MEMri64 : Operand<i64> {
1399  let PrintMethod = "printMemOperand";
1400  let MIOperandInfo = (ops Int64Regs, i64imm);
1401}
1402
1403def imem : Operand<iPTR> {
1404    let PrintMethod = "printOperand";
1405}
1406
1407def imemAny : Operand<iPTRAny> {
1408    let PrintMethod = "printOperand";
1409}
1410
1411def LdStCode : Operand<i32> {
1412    let PrintMethod = "printLdStCode";
1413}
1414
1415def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1416def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1417
1418def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1419                     "mov.u32 \t$dst, $a;",
1420                     [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1421
1422def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1423                     "mov.u64 \t$dst, $a;",
1424                     [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1425
1426// Get pointer to local stack
1427def MOV_DEPOT_ADDR
1428  : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
1429              "mov.u32 \t$d, __local_depot$num;", []>;
1430def MOV_DEPOT_ADDR_64
1431  : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
1432              "mov.u64 \t$d, __local_depot$num;", []>;
1433
1434
1435// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1436let IsSimpleMove=1 in {
1437def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1438                   "mov.pred \t$dst, $sss;", []>;
1439def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1440                    "mov.u16 \t$dst, $sss;", []>;
1441def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1442                    "mov.u32 \t$dst, $sss;", []>;
1443def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1444                    "mov.u64 \t$dst, $sss;", []>;
1445
1446def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1447                    "mov.f32 \t$dst, $src;", []>;
1448def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1449                    "mov.f64 \t$dst, $src;", []>;
1450}
1451def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1452                    "mov.pred \t$dst, $src;",
1453          [(set Int1Regs:$dst, imm:$src)]>;
1454def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1455                    "mov.u16 \t$dst, $src;",
1456          [(set Int16Regs:$dst, imm:$src)]>;
1457def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1458                    "mov.u32 \t$dst, $src;",
1459          [(set Int32Regs:$dst, imm:$src)]>;
1460def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1461                    "mov.u64 \t$dst, $src;",
1462          [(set Int64Regs:$dst, imm:$src)]>;
1463
1464def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1465                    "mov.f32 \t$dst, $src;",
1466          [(set Float32Regs:$dst, fpimm:$src)]>;
1467def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1468                    "mov.f64 \t$dst, $src;",
1469          [(set Float64Regs:$dst, fpimm:$src)]>;
1470
1471def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1472
1473//---- Copy Frame Index ----
1474def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1475                        "add.u32 \t$dst, ${addr:add};",
1476                        [(set Int32Regs:$dst, ADDRri:$addr)]>;
1477def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1478                        "add.u64 \t$dst, ${addr:add};",
1479                        [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1480
1481//-----------------------------------
1482// Comparison and Selection
1483//-----------------------------------
1484
1485multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
1486                       Instruction setp_16rr,
1487                       Instruction setp_16ri,
1488                       Instruction setp_16ir,
1489                       Instruction setp_32rr,
1490                       Instruction setp_32ri,
1491                       Instruction setp_32ir,
1492                       Instruction setp_64rr,
1493                       Instruction setp_64ri,
1494                       Instruction setp_64ir,
1495                       Instruction set_16rr,
1496                       Instruction set_16ri,
1497                       Instruction set_16ir,
1498                       Instruction set_32rr,
1499                       Instruction set_32ri,
1500                       Instruction set_32ir,
1501                       Instruction set_64rr,
1502                       Instruction set_64ri,
1503                       Instruction set_64ir> {
1504  // i16 -> pred
1505  def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)),
1506            (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1507  def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)),
1508            (setp_16ri Int16Regs:$a, imm:$b, Mode)>;
1509  def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)),
1510            (setp_16ir imm:$a, Int16Regs:$b, Mode)>;
1511  // i32 -> pred
1512  def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)),
1513            (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1514  def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)),
1515            (setp_32ri Int32Regs:$a, imm:$b, Mode)>;
1516  def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)),
1517            (setp_32ir imm:$a, Int32Regs:$b, Mode)>;
1518  // i64 -> pred
1519  def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)),
1520            (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1521  def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)),
1522            (setp_64ri Int64Regs:$a, imm:$b, Mode)>;
1523  def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)),
1524            (setp_64ir imm:$a, Int64Regs:$b, Mode)>;
1525
1526  // i16 -> i32
1527  def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)),
1528            (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
1529  def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)),
1530            (set_16ri Int16Regs:$a, imm:$b, Mode)>;
1531  def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)),
1532            (set_16ir imm:$a, Int16Regs:$b, Mode)>;
1533  // i32 -> i32
1534  def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)),
1535            (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
1536  def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)),
1537            (set_32ri Int32Regs:$a, imm:$b, Mode)>;
1538  def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)),
1539            (set_32ir imm:$a, Int32Regs:$b, Mode)>;
1540  // i64 -> i32
1541  def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)),
1542            (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>;
1543  def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)),
1544            (set_64ri Int64Regs:$a, imm:$b, Mode)>;
1545  def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)),
1546            (set_64ir imm:$a, Int64Regs:$b, Mode)>;
1547}
1548
1549multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
1550  : ISET_FORMAT<OpNode, Mode,
1551                SETP_s16rr, SETP_s16ri, SETP_s16ir,
1552                SETP_s32rr, SETP_s32ri, SETP_s32ir,
1553                SETP_s64rr, SETP_s64ri, SETP_s64ir,
1554                SET_s16rr, SET_s16ri, SET_s16ir,
1555                SET_s32rr, SET_s32ri, SET_s32ir,
1556                SET_s64rr, SET_s64ri, SET_s64ir> {
1557  // TableGen doesn't like empty multiclasses
1558  def : PatLeaf<(i32 0)>;
1559}
1560
1561multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
1562  : ISET_FORMAT<OpNode, Mode,
1563                SETP_u16rr, SETP_u16ri, SETP_u16ir,
1564                SETP_u32rr, SETP_u32ri, SETP_u32ir,
1565                SETP_u64rr, SETP_u64ri, SETP_u64ir,
1566                SET_u16rr, SET_u16ri, SET_u16ir,
1567                SET_u32rr, SET_u32ri, SET_u32ir,
1568                SET_u64rr, SET_u64ri, SET_u64ir> {
1569  // TableGen doesn't like empty multiclasses
1570  def : PatLeaf<(i32 0)>;
1571}
1572
1573defm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
1574defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
1575defm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
1576defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
1577defm : ISET_FORMAT_SIGNED<setge, CmpGE>;
1578defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
1579defm : ISET_FORMAT_SIGNED<setle, CmpLE>;
1580defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
1581defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
1582defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
1583defm : ISET_FORMAT_SIGNED<setne, CmpNE>;
1584defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
1585
1586// i1 compares
1587def : Pat<(setne Int1Regs:$a, Int1Regs:$b),
1588          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1589def : Pat<(setune Int1Regs:$a, Int1Regs:$b),
1590          (XORb1rr Int1Regs:$a, Int1Regs:$b)>;
1591
1592def : Pat<(seteq Int1Regs:$a, Int1Regs:$b),
1593          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1594def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
1595          (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1596
1597// i1 compare -> i32
1598def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1599          (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1600def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
1601          (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
1602
1603
1604
1605multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
1606  // f32 -> pred
1607  def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1608            (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1609        Requires<[doF32FTZ]>;
1610  def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
1611            (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1612  def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1613            (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1614        Requires<[doF32FTZ]>;
1615  def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)),
1616            (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1617  def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1618            (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1619        Requires<[doF32FTZ]>;
1620  def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)),
1621            (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1622
1623  // f64 -> pred
1624  def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)),
1625            (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1626  def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)),
1627            (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1628  def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)),
1629            (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1630
1631  // f32 -> i32
1632  def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1633            (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
1634        Requires<[doF32FTZ]>;
1635  def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
1636            (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>;
1637  def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1638            (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>,
1639        Requires<[doF32FTZ]>;
1640  def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)),
1641            (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>;
1642  def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1643            (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>,
1644        Requires<[doF32FTZ]>;
1645  def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)),
1646            (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>;
1647
1648  // f64 -> i32
1649  def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)),
1650            (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>;
1651  def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)),
1652            (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>;
1653  def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)),
1654            (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
1655}
1656
1657defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
1658defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
1659defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
1660defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
1661defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
1662defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
1663
1664defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
1665defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
1666defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>;
1667defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
1668defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
1669defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
1670
1671defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>;
1672defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>;
1673defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>;
1674defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>;
1675defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>;
1676defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>;
1677
1678defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
1679defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
1680
1681//def ld_param         : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1682//                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1683
1684def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1685  SDTCisInt<2>]>;
1686def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1687  SDTCisInt<1>, SDTCisInt<2>]>;
1688def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1689def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
1690def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
1691def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1692def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1693def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1694def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>;
1695def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>;
1696def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1697def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1698def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1699def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1700def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1701def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1702def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1703def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
1704def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
1705def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1706
1707def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1708                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1709def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1710  SDTDeclareScalarParamProfile,
1711                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1712def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1713  SDTDeclareParamProfile,
1714                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1715def DeclareRet   : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1716                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1717def LoadParam    : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1718                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1719def LoadParamV2  : SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
1720                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1721def LoadParamV4  : SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
1722                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1723def PrintCall    : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1724                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1725def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1726                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1727def StoreParam   : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1728                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1729def StoreParamV2 : SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
1730                         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1731def StoreParamV4 : SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
1732                         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1733def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1734                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1735def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1736                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1737def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1738                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1739def CallArg      : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1740                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1741def LastCallArg  : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1742                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1743def CallArgEnd   : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1744                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1745def CallVoid     : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1746                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1747def Prototype    : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1748                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1749def CallVal      : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1750                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1751def MoveParam    : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1752                         []>;
1753def StoreRetval  : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1754                         [SDNPHasChain, SDNPSideEffect]>;
1755def StoreRetvalV2  : SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
1756                           [SDNPHasChain, SDNPSideEffect]>;
1757def StoreRetvalV4  : SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
1758                           [SDNPHasChain, SDNPSideEffect]>;
1759def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1760  SDTPseudoUseParamProfile,
1761                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1762def RETURNNode   : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1763                         [SDNPHasChain, SDNPSideEffect]>;
1764
1765class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1766      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1767                !strconcat(!strconcat("ld.param", opstr),
1768                "\t$dst, [retval0+$b];"),
1769                []>;
1770
1771class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1772      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1773                !strconcat(!strconcat("mov", opstr),
1774                "\t$dst, retval$b;"),
1775                [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1776
1777class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
1778      NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
1779                !strconcat(!strconcat("ld.param.v2", opstr),
1780                "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
1781
1782class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
1783      NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
1784                      regclass:$dst4),
1785                (ins i32imm:$b),
1786                !strconcat(!strconcat("ld.param.v4", opstr),
1787                "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), []>;
1788
1789class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1790      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1791                !strconcat(!strconcat("st.param", opstr),
1792                "\t[param$a+$b], $val;"),
1793                []>;
1794
1795class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
1796      NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
1797                             i32imm:$a, i32imm:$b),
1798                !strconcat(!strconcat("st.param.v2", opstr),
1799                "\t[param$a+$b], {{$val, $val2}};"),
1800                []>;
1801
1802class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
1803      NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
1804                             regclass:$val3, i32imm:$a, i32imm:$b),
1805                !strconcat(!strconcat("st.param.v4", opstr),
1806                "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
1807                []>;
1808
1809class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1810      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1811                !strconcat(!strconcat("st.param", opstr),
1812                "\t[func_retval0+$a], $val;"),
1813                []>;
1814
1815class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
1816      NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
1817                !strconcat(!strconcat("st.param.v2", opstr),
1818                "\t[func_retval0+$a], {{$val, $val2}};"),
1819                []>;
1820
1821class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
1822      NVPTXInst<(outs),
1823                (ins regclass:$val, regclass:$val2, regclass:$val3,
1824                     regclass:$val4, i32imm:$a),
1825                !strconcat(!strconcat("st.param.v4", opstr),
1826                "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
1827                []>;
1828
1829def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1830"call (retval0), ",
1831                                [(PrintCall (i32 1))]>;
1832def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1833"call (retval0, retval1), ",
1834                                [(PrintCall (i32 2))]>;
1835def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1836"call (retval0, retval1, retval2), ",
1837                                [(PrintCall (i32 3))]>;
1838def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1839"call (retval0, retval1, retval2, retval3), ",
1840                                [(PrintCall (i32 4))]>;
1841def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1842"call (retval0, retval1, retval2, retval3, retval4), ",
1843                                [(PrintCall (i32 5))]>;
1844def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1845"call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1846                                [(PrintCall (i32 6))]>;
1847def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1848"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1849                                [(PrintCall (i32 7))]>;
1850def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1851!strconcat("call (retval0, retval1, retval2, retval3, retval4",
1852           ", retval5, retval6, retval7), "),
1853                                [(PrintCall (i32 8))]>;
1854
1855def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1856                                [(PrintCall (i32 0))]>;
1857
1858def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1859"call.uni (retval0), ",
1860                                [(PrintCallUni (i32 1))]>;
1861def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1862"call.uni (retval0, retval1), ",
1863                                [(PrintCallUni (i32 2))]>;
1864def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1865"call.uni (retval0, retval1, retval2), ",
1866                                [(PrintCallUni (i32 3))]>;
1867def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1868"call.uni (retval0, retval1, retval2, retval3), ",
1869                                [(PrintCallUni (i32 4))]>;
1870def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1871"call.uni (retval0, retval1, retval2, retval3, retval4), ",
1872                                [(PrintCallUni (i32 5))]>;
1873def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1874"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1875                                [(PrintCallUni (i32 6))]>;
1876def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1877"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1878                                [(PrintCallUni (i32 7))]>;
1879def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1880!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1881           ", retval5, retval6, retval7), "),
1882                                [(PrintCallUni (i32 8))]>;
1883
1884def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1885                                [(PrintCallUni (i32 0))]>;
1886
1887def LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
1888def LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
1889def LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
1890def LoadParamMemI8     : LoadParamMemInst<Int16Regs, ".b8">;
1891def LoadParamMemV2I64  : LoadParamV2MemInst<Int64Regs, ".b64">;
1892def LoadParamMemV2I32  : LoadParamV2MemInst<Int32Regs, ".b32">;
1893def LoadParamMemV2I16  : LoadParamV2MemInst<Int16Regs, ".b16">;
1894def LoadParamMemV2I8   : LoadParamV2MemInst<Int16Regs, ".b8">;
1895def LoadParamMemV4I32  : LoadParamV4MemInst<Int32Regs, ".b32">;
1896def LoadParamMemV4I16  : LoadParamV4MemInst<Int16Regs, ".b16">;
1897def LoadParamMemV4I8   : LoadParamV4MemInst<Int16Regs, ".b8">;
1898def LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
1899def LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
1900def LoadParamMemV2F32  : LoadParamV2MemInst<Float32Regs, ".f32">;
1901def LoadParamMemV2F64  : LoadParamV2MemInst<Float64Regs, ".f64">;
1902def LoadParamMemV4F32  : LoadParamV4MemInst<Float32Regs, ".f32">;
1903
1904def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
1905def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
1906
1907def StoreParamI16    : StoreParamInst<Int16Regs, ".b16">;
1908def StoreParamI8     : StoreParamInst<Int16Regs, ".b8">;
1909def StoreParamV2I64  : StoreParamV2Inst<Int64Regs, ".b64">;
1910def StoreParamV2I32  : StoreParamV2Inst<Int32Regs, ".b32">;
1911def StoreParamV2I16  : StoreParamV2Inst<Int16Regs, ".b16">;
1912def StoreParamV2I8   : StoreParamV2Inst<Int16Regs, ".b8">;
1913
1914// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1915//def StoreParamV4I32    : StoreParamV4Inst<Int32Regs, ".b32">;
1916def StoreParamV4I32    : NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2,
1917                                               Int32Regs:$val3, Int32Regs:$val4,
1918                                                i32imm:$a, i32imm:$b),
1919                "st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1920                         []>;
1921
1922def StoreParamV4I16    : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1923                                               Int16Regs:$val3, Int16Regs:$val4,
1924                                                i32imm:$a, i32imm:$b),
1925                "st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1926                         []>;
1927
1928def StoreParamV4I8     : NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2,
1929                                                Int16Regs:$val3, Int16Regs:$val4,
1930                                                i32imm:$a, i32imm:$b),
1931                 "st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1932                         []>;
1933
1934def StoreParamF32    : StoreParamInst<Float32Regs, ".f32">;
1935def StoreParamF64    : StoreParamInst<Float64Regs, ".f64">;
1936def StoreParamV2F32    : StoreParamV2Inst<Float32Regs, ".f32">;
1937def StoreParamV2F64    : StoreParamV2Inst<Float64Regs, ".f64">;
1938// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
1939//def StoreParamV4F32    : StoreParamV4Inst<Float32Regs, ".f32">;
1940def StoreParamV4F32    : NVPTXInst<(outs),
1941                                   (ins Float32Regs:$val, Float32Regs:$val2,
1942                                        Float32Regs:$val3, Float32Regs:$val4,
1943                                        i32imm:$a, i32imm:$b),
1944                "st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
1945                        []>;
1946
1947
1948def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
1949def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
1950def StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
1951def StoreRetvalI8     : StoreRetvalInst<Int16Regs, ".b8">;
1952def StoreRetvalV2I64  : StoreRetvalV2Inst<Int64Regs, ".b64">;
1953def StoreRetvalV2I32  : StoreRetvalV2Inst<Int32Regs, ".b32">;
1954def StoreRetvalV2I16  : StoreRetvalV2Inst<Int16Regs, ".b16">;
1955def StoreRetvalV2I8   : StoreRetvalV2Inst<Int16Regs, ".b8">;
1956def StoreRetvalV4I32  : StoreRetvalV4Inst<Int32Regs, ".b32">;
1957def StoreRetvalV4I16  : StoreRetvalV4Inst<Int16Regs, ".b16">;
1958def StoreRetvalV4I8   : StoreRetvalV4Inst<Int16Regs, ".b8">;
1959
1960def StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
1961def StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
1962def StoreRetvalV2F64  : StoreRetvalV2Inst<Float64Regs, ".f64">;
1963def StoreRetvalV2F32  : StoreRetvalV2Inst<Float32Regs, ".f32">;
1964def StoreRetvalV4F32  : StoreRetvalV4Inst<Float32Regs, ".f32">;
1965
1966def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
1967def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
1968def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
1969def RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
1970
1971class CallArgInst<NVPTXRegClass regclass> :
1972      NVPTXInst<(outs), (ins regclass:$a), "$a, ",
1973                [(CallArg (i32 0), regclass:$a)]>;
1974
1975class LastCallArgInst<NVPTXRegClass regclass> :
1976      NVPTXInst<(outs), (ins regclass:$a), "$a",
1977                [(LastCallArg (i32 0), regclass:$a)]>;
1978
1979def CallArgI64     : CallArgInst<Int64Regs>;
1980def CallArgI32     : CallArgInst<Int32Regs>;
1981def CallArgI16     : CallArgInst<Int16Regs>;
1982
1983def CallArgF64     : CallArgInst<Float64Regs>;
1984def CallArgF32     : CallArgInst<Float32Regs>;
1985
1986def LastCallArgI64 : LastCallArgInst<Int64Regs>;
1987def LastCallArgI32 : LastCallArgInst<Int32Regs>;
1988def LastCallArgI16 : LastCallArgInst<Int16Regs>;
1989
1990def LastCallArgF64 : LastCallArgInst<Float64Regs>;
1991def LastCallArgF32 : LastCallArgInst<Float32Regs>;
1992
1993def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
1994                              [(CallArg (i32 0), (i32 imm:$a))]>;
1995def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
1996                              [(LastCallArg (i32 0), (i32 imm:$a))]>;
1997
1998def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
1999                             [(CallArg (i32 1), (i32 imm:$a))]>;
2000def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
2001                             [(LastCallArg (i32 1), (i32 imm:$a))]>;
2002
2003def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
2004                             "$addr, ",
2005                             [(CallVoid (Wrapper tglobaladdr:$addr))]>;
2006def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
2007                             "$addr, ",
2008                             [(CallVoid Int32Regs:$addr)]>;
2009def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
2010                             "$addr, ",
2011                             [(CallVoid Int64Regs:$addr)]>;
2012def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
2013                             ", prototype_$val;",
2014                             [(Prototype (i32 imm:$val))]>;
2015
2016def DeclareRetMemInst : NVPTXInst<(outs),
2017  (ins i32imm:$align, i32imm:$size, i32imm:$num),
2018         ".param .align $align .b8 retval$num[$size];",
2019         [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
2020def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2021         ".param .b$size retval$num;",
2022         [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
2023def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2024         ".reg .b$size retval$num;",
2025         [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2026
2027def DeclareParamInst : NVPTXInst<(outs),
2028  (ins i32imm:$align, i32imm:$a, i32imm:$size),
2029         ".param .align $align .b8 param$a[$size];",
2030         [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2031def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2032         ".param .b$size param$a;",
2033         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2034def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2035         ".reg .b$size param$a;",
2036         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2037
2038class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2039      NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2040                !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2041                [(set regclass:$dst, (MoveParam regclass:$src))]>;
2042
2043def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2044def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2045def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2046                   "cvt.u16.u32\t$dst, $src;",
2047                   [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2048def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2049def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2050
2051class PseudoUseParamInst<NVPTXRegClass regclass> :
2052      NVPTXInst<(outs), (ins regclass:$src),
2053      "// Pseudo use of $src",
2054      [(PseudoUseParam regclass:$src)]>;
2055
2056def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2057def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2058def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2059def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2060def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2061
2062
2063//
2064// Load / Store Handling
2065//
2066multiclass LD<NVPTXRegClass regclass> {
2067  def _avar : NVPTXInst<(outs regclass:$dst),
2068    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2069      i32imm:$fromWidth, imem:$addr),
2070!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2071           "$fromWidth \t$dst, [$addr];"), []>;
2072  def _areg : NVPTXInst<(outs regclass:$dst),
2073    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2074      i32imm:$fromWidth, Int32Regs:$addr),
2075!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2076           "$fromWidth \t$dst, [$addr];"), []>;
2077  def _areg_64 : NVPTXInst<(outs regclass:$dst),
2078    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2079     i32imm:$fromWidth, Int64Regs:$addr),
2080     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2081                " \t$dst, [$addr];"), []>;
2082  def _ari : NVPTXInst<(outs regclass:$dst),
2083    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2084      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2085!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2086           "$fromWidth \t$dst, [$addr+$offset];"), []>;
2087  def _ari_64 : NVPTXInst<(outs regclass:$dst),
2088    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2089     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2090    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2091               " \t$dst, [$addr+$offset];"), []>;
2092  def _asi : NVPTXInst<(outs regclass:$dst),
2093    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2094      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2095!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2096           "$fromWidth \t$dst, [$addr+$offset];"), []>;
2097}
2098
2099let mayLoad=1, hasSideEffects=0 in {
2100defm LD_i8  : LD<Int16Regs>;
2101defm LD_i16 : LD<Int16Regs>;
2102defm LD_i32 : LD<Int32Regs>;
2103defm LD_i64 : LD<Int64Regs>;
2104defm LD_f32 : LD<Float32Regs>;
2105defm LD_f64 : LD<Float64Regs>;
2106}
2107
2108multiclass ST<NVPTXRegClass regclass> {
2109  def _avar : NVPTXInst<(outs),
2110    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2111      LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2112!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2113           " \t[$addr], $src;"), []>;
2114  def _areg : NVPTXInst<(outs),
2115    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2116      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2117!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2118           " \t[$addr], $src;"), []>;
2119  def _areg_64 : NVPTXInst<(outs),
2120    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2121     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2122  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2123               "\t[$addr], $src;"), []>;
2124  def _ari : NVPTXInst<(outs),
2125    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2126      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2127!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2128           " \t[$addr+$offset], $src;"), []>;
2129  def _ari_64 : NVPTXInst<(outs),
2130    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2131     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2132  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2133               "\t[$addr+$offset], $src;"), []>;
2134  def _asi : NVPTXInst<(outs),
2135    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2136      LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2137!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2138           " \t[$addr+$offset], $src;"), []>;
2139}
2140
2141let mayStore=1, hasSideEffects=0 in {
2142defm ST_i8  : ST<Int16Regs>;
2143defm ST_i16 : ST<Int16Regs>;
2144defm ST_i32 : ST<Int32Regs>;
2145defm ST_i64 : ST<Int64Regs>;
2146defm ST_f32 : ST<Float32Regs>;
2147defm ST_f64 : ST<Float64Regs>;
2148}
2149
2150// The following is used only in and after vector elementizations.
2151// Vector elementization happens at the machine instruction level, so the
2152// following instruction
2153// never appears in the DAG.
2154multiclass LD_VEC<NVPTXRegClass regclass> {
2155  def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2156    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2157      i32imm:$fromWidth, imem:$addr),
2158    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2159               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2160  def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2161    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2162      i32imm:$fromWidth, Int32Regs:$addr),
2163    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2164               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2165  def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2166    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2167     i32imm:$fromWidth, Int64Regs:$addr),
2168    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2169               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2170  def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2171    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2172      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2173    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2174               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2175  def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2176    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2177     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2178    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2179               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2180  def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2181    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2182      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2183    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2184               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2185  def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2186      regclass:$dst3, regclass:$dst4),
2187    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2188      i32imm:$fromWidth, imem:$addr),
2189    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2190               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2191  def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2192      regclass:$dst4),
2193    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2194      i32imm:$fromWidth, Int32Regs:$addr),
2195    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2196               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2197  def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2198                               regclass:$dst3, regclass:$dst4),
2199    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2200     i32imm:$fromWidth, Int64Regs:$addr),
2201    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2202               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2203  def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2204      regclass:$dst4),
2205    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2206      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2207    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2208               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2209                []>;
2210  def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2211                              regclass:$dst3, regclass:$dst4),
2212    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2213     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2214    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2215               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2216    []>;
2217  def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2218      regclass:$dst4),
2219    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2220      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2221    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2222               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2223                []>;
2224}
2225let mayLoad=1, hasSideEffects=0 in {
2226defm LDV_i8  : LD_VEC<Int16Regs>;
2227defm LDV_i16 : LD_VEC<Int16Regs>;
2228defm LDV_i32 : LD_VEC<Int32Regs>;
2229defm LDV_i64 : LD_VEC<Int64Regs>;
2230defm LDV_f32 : LD_VEC<Float32Regs>;
2231defm LDV_f64 : LD_VEC<Float64Regs>;
2232}
2233
2234multiclass ST_VEC<NVPTXRegClass regclass> {
2235  def _v2_avar : NVPTXInst<(outs),
2236    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2237      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2238    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2239               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2240  def _v2_areg : NVPTXInst<(outs),
2241    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2242      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2243    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2244               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2245  def _v2_areg_64 : NVPTXInst<(outs),
2246    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2247     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2248    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2249               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2250  def _v2_ari : NVPTXInst<(outs),
2251    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2252      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2253      i32imm:$offset),
2254    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2255               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2256  def _v2_ari_64 : NVPTXInst<(outs),
2257    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2258     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2259     i32imm:$offset),
2260    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2261               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2262  def _v2_asi : NVPTXInst<(outs),
2263    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2264      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2265      i32imm:$offset),
2266    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2267               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2268  def _v4_avar : NVPTXInst<(outs),
2269    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2270      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2271      i32imm:$fromWidth, imem:$addr),
2272    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2273               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2274  def _v4_areg : NVPTXInst<(outs),
2275    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2276      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2277      i32imm:$fromWidth, Int32Regs:$addr),
2278    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2279               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2280  def _v4_areg_64 : NVPTXInst<(outs),
2281    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2282     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2283     i32imm:$fromWidth, Int64Regs:$addr),
2284    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2285               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2286  def _v4_ari : NVPTXInst<(outs),
2287    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2288      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2289      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2290    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2291               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2292    []>;
2293  def _v4_ari_64 : NVPTXInst<(outs),
2294    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2295     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2296     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2297    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2298               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2299     []>;
2300  def _v4_asi : NVPTXInst<(outs),
2301    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2302      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2303      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2304    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2305               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2306    []>;
2307}
2308let mayStore=1, hasSideEffects=0 in {
2309defm STV_i8  : ST_VEC<Int16Regs>;
2310defm STV_i16 : ST_VEC<Int16Regs>;
2311defm STV_i32 : ST_VEC<Int32Regs>;
2312defm STV_i64 : ST_VEC<Int64Regs>;
2313defm STV_f32 : ST_VEC<Float32Regs>;
2314defm STV_f64 : ST_VEC<Float64Regs>;
2315}
2316
2317
2318//---- Conversion ----
2319
2320class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2321  NVPTXRegClass regclassOut> :
2322           NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2323           !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2324     [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2325
2326def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2327def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2328def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2329def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2330
2331// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
2332// we cannot specify floating-point literals in isel patterns.  Therefore, we
2333// use an integer selp to select either 1 or 0 and then cvt to floating-point.
2334
2335// sint -> f32
2336def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
2337          (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2338def : Pat<(f32 (sint_to_fp Int16Regs:$a)),
2339          (CVT_f32_s16 Int16Regs:$a, CvtRN)>;
2340def : Pat<(f32 (sint_to_fp Int32Regs:$a)),
2341          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
2342def : Pat<(f32 (sint_to_fp Int64Regs:$a)),
2343          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
2344
2345// uint -> f32
2346def : Pat<(f32 (uint_to_fp Int1Regs:$a)),
2347          (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2348def : Pat<(f32 (uint_to_fp Int16Regs:$a)),
2349          (CVT_f32_u16 Int16Regs:$a, CvtRN)>;
2350def : Pat<(f32 (uint_to_fp Int32Regs:$a)),
2351          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
2352def : Pat<(f32 (uint_to_fp Int64Regs:$a)),
2353          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
2354
2355// sint -> f64
2356def : Pat<(f64 (sint_to_fp Int1Regs:$a)),
2357          (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2358def : Pat<(f64 (sint_to_fp Int16Regs:$a)),
2359          (CVT_f64_s16 Int16Regs:$a, CvtRN)>;
2360def : Pat<(f64 (sint_to_fp Int32Regs:$a)),
2361          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
2362def : Pat<(f64 (sint_to_fp Int64Regs:$a)),
2363          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
2364
2365// uint -> f64
2366def : Pat<(f64 (uint_to_fp Int1Regs:$a)),
2367          (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
2368def : Pat<(f64 (uint_to_fp Int16Regs:$a)),
2369          (CVT_f64_u16 Int16Regs:$a, CvtRN)>;
2370def : Pat<(f64 (uint_to_fp Int32Regs:$a)),
2371          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
2372def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
2373          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
2374
2375
2376// f32 -> sint
2377def : Pat<(i1 (fp_to_sint Float32Regs:$a)),
2378          (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2379def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2380          (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2381def : Pat<(i16 (fp_to_sint Float32Regs:$a)),
2382          (CVT_s16_f32 Float32Regs:$a, CvtRZI)>;
2383def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2384          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2385def : Pat<(i32 (fp_to_sint Float32Regs:$a)),
2386          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
2387def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2388          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2389def : Pat<(i64 (fp_to_sint Float32Regs:$a)),
2390          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
2391
2392// f32 -> uint
2393def : Pat<(i1 (fp_to_uint Float32Regs:$a)),
2394          (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
2395def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2396          (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2397def : Pat<(i16 (fp_to_uint Float32Regs:$a)),
2398          (CVT_u16_f32 Float32Regs:$a, CvtRZI)>;
2399def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2400          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2401def : Pat<(i32 (fp_to_uint Float32Regs:$a)),
2402          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
2403def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2404          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
2405def : Pat<(i64 (fp_to_uint Float32Regs:$a)),
2406          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
2407
2408// f64 -> sint
2409def : Pat<(i1 (fp_to_sint Float64Regs:$a)),
2410          (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2411def : Pat<(i16 (fp_to_sint Float64Regs:$a)),
2412          (CVT_s16_f64 Float64Regs:$a, CvtRZI)>;
2413def : Pat<(i32 (fp_to_sint Float64Regs:$a)),
2414          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
2415def : Pat<(i64 (fp_to_sint Float64Regs:$a)),
2416          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
2417
2418// f64 -> uint
2419def : Pat<(i1 (fp_to_uint Float64Regs:$a)),
2420          (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>;
2421def : Pat<(i16 (fp_to_uint Float64Regs:$a)),
2422          (CVT_u16_f64 Float64Regs:$a, CvtRZI)>;
2423def : Pat<(i32 (fp_to_uint Float64Regs:$a)),
2424          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
2425def : Pat<(i64 (fp_to_uint Float64Regs:$a)),
2426          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
2427
2428// sext i1
2429def : Pat<(i16 (sext Int1Regs:$a)),
2430          (SELP_s16ii -1, 0, Int1Regs:$a)>;
2431def : Pat<(i32 (sext Int1Regs:$a)),
2432          (SELP_s32ii -1, 0, Int1Regs:$a)>;
2433def : Pat<(i64 (sext Int1Regs:$a)),
2434          (SELP_s64ii -1, 0, Int1Regs:$a)>;
2435
2436// zext i1
2437def : Pat<(i16 (zext Int1Regs:$a)),
2438          (SELP_u16ii 1, 0, Int1Regs:$a)>;
2439def : Pat<(i32 (zext Int1Regs:$a)),
2440          (SELP_u32ii 1, 0, Int1Regs:$a)>;
2441def : Pat<(i64 (zext Int1Regs:$a)),
2442          (SELP_u64ii 1, 0, Int1Regs:$a)>;
2443
2444// anyext i1
2445def : Pat<(i16 (anyext Int1Regs:$a)),
2446          (SELP_u16ii -1, 0, Int1Regs:$a)>;
2447def : Pat<(i32 (anyext Int1Regs:$a)),
2448          (SELP_u32ii -1, 0, Int1Regs:$a)>;
2449def : Pat<(i64 (anyext Int1Regs:$a)),
2450          (SELP_u64ii -1, 0, Int1Regs:$a)>;
2451
2452// sext i16
2453def : Pat<(i32 (sext Int16Regs:$a)),
2454          (CVT_s32_s16 Int16Regs:$a, CvtNONE)>;
2455def : Pat<(i64 (sext Int16Regs:$a)),
2456          (CVT_s64_s16 Int16Regs:$a, CvtNONE)>;
2457
2458// zext i16
2459def : Pat<(i32 (zext Int16Regs:$a)),
2460          (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2461def : Pat<(i64 (zext Int16Regs:$a)),
2462          (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2463
2464// anyext i16
2465def : Pat<(i32 (anyext Int16Regs:$a)),
2466          (CVT_u32_u16 Int16Regs:$a, CvtNONE)>;
2467def : Pat<(i64 (anyext Int16Regs:$a)),
2468          (CVT_u64_u16 Int16Regs:$a, CvtNONE)>;
2469
2470// sext i32
2471def : Pat<(i64 (sext Int32Regs:$a)),
2472          (CVT_s64_s32 Int32Regs:$a, CvtNONE)>;
2473
2474// zext i32
2475def : Pat<(i64 (zext Int32Regs:$a)),
2476          (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2477
2478// anyext i32
2479def : Pat<(i64 (anyext Int32Regs:$a)),
2480          (CVT_u64_u32 Int32Regs:$a, CvtNONE)>;
2481
2482
2483// truncate i64
2484def : Pat<(i32 (trunc Int64Regs:$a)),
2485          (CVT_u32_u64 Int64Regs:$a, CvtNONE)>;
2486def : Pat<(i16 (trunc Int64Regs:$a)),
2487          (CVT_u16_u64 Int64Regs:$a, CvtNONE)>;
2488def : Pat<(i1 (trunc Int64Regs:$a)),
2489          (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>;
2490
2491// truncate i32
2492def : Pat<(i16 (trunc Int32Regs:$a)),
2493          (CVT_u16_u32 Int32Regs:$a, CvtNONE)>;
2494def : Pat<(i1 (trunc Int32Regs:$a)),
2495          (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>;
2496
2497// truncate i16
2498def : Pat<(i1 (trunc Int16Regs:$a)),
2499          (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>;
2500
2501// sext_inreg
2502def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>;
2503def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>;
2504def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>;
2505def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>;
2506def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>;
2507def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
2508
2509
2510// Select instructions with 32-bit predicates
2511def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2512          (SELP_b16rr Int16Regs:$a, Int16Regs:$b,
2513          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2514def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2515          (SELP_b32rr Int32Regs:$a, Int32Regs:$b,
2516          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2517def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2518          (SELP_b64rr Int64Regs:$a, Int64Regs:$b,
2519          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2520def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2521          (SELP_f32rr Float32Regs:$a, Float32Regs:$b,
2522          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2523def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2524          (SELP_f64rr Float64Regs:$a, Float64Regs:$b,
2525          (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
2526
2527
2528// pack a set of smaller int registers to a larger int register
2529def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2530                          (ins Int16Regs:$s1, Int16Regs:$s2,
2531                               Int16Regs:$s3, Int16Regs:$s4),
2532                          "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2533                          []>;
2534def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2535                          (ins Int16Regs:$s1, Int16Regs:$s2),
2536                          "mov.b32\t$d, {{$s1, $s2}};",
2537                          []>;
2538def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2539                          (ins Int32Regs:$s1, Int32Regs:$s2),
2540                          "mov.b64\t$d, {{$s1, $s2}};",
2541                          []>;
2542def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2543                          (ins Float32Regs:$s1, Float32Regs:$s2),
2544                          "mov.b64\t$d, {{$s1, $s2}};",
2545                          []>;
2546
2547// unpack a larger int register to a set of smaller int registers
2548def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2549                                 Int16Regs:$d3, Int16Regs:$d4),
2550                           (ins Int64Regs:$s),
2551                           "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2552                          []>;
2553def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2554                           (ins Int32Regs:$s),
2555                           "mov.b32\t{{$d1, $d2}}, $s;",
2556                          []>;
2557def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2558                           (ins Int64Regs:$s),
2559                           "mov.b64\t{{$d1, $d2}}, $s;",
2560                          []>;
2561def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2562                           (ins Float64Regs:$s),
2563                           "mov.b64\t{{$d1, $d2}}, $s;",
2564                          []>;
2565
2566// Count leading zeros
2567def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2568                       "clz.b32\t$d, $a;",
2569                       []>;
2570def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2571                       "clz.b64\t$d, $a;",
2572                       []>;
2573
2574// 32-bit has a direct PTX instruction
2575def : Pat<(ctlz Int32Regs:$a),
2576          (CLZr32 Int32Regs:$a)>;
2577def : Pat<(ctlz_zero_undef Int32Regs:$a),
2578          (CLZr32 Int32Regs:$a)>;
2579
2580// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2581// to 64-bit to match the LLVM semantics
2582def : Pat<(ctlz Int64Regs:$a),
2583          (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2584def : Pat<(ctlz_zero_undef Int64Regs:$a),
2585          (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
2586
2587// For 16-bit, we zero-extend to 32-bit, then trunc the result back
2588// to 16-bits (ctlz of a 16-bit value is guaranteed to require less
2589// than 16 bits to store). We also need to subtract 16 because the
2590// high-order 16 zeros were counted.
2591def : Pat<(ctlz Int16Regs:$a),
2592          (SUBi16ri (CVT_u16_u32 (CLZr32
2593            (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2594           CvtNONE), 16)>;
2595def : Pat<(ctlz_zero_undef Int16Regs:$a),
2596          (SUBi16ri (CVT_u16_u32 (CLZr32
2597            (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2598           CvtNONE), 16)>;
2599
2600// Population count
2601def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
2602                        "popc.b32\t$d, $a;",
2603                        []>;
2604def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2605                        "popc.b64\t$d, $a;",
2606                        []>;
2607
2608// 32-bit has a direct PTX instruction
2609def : Pat<(ctpop Int32Regs:$a),
2610          (POPCr32 Int32Regs:$a)>;
2611
2612// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
2613// to 64-bit to match the LLVM semantics
2614def : Pat<(ctpop Int64Regs:$a),
2615          (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
2616
2617// For 16-bit, we zero-extend to 32-bit, then trunc the result back
2618// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
2619// than 16 bits to store)
2620def : Pat<(ctpop Int16Regs:$a),
2621          (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
2622           CvtNONE)>;
2623
2624// fround f64 -> f32
2625def : Pat<(f32 (fround Float64Regs:$a)),
2626          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
2627def : Pat<(f32 (fround Float64Regs:$a)),
2628          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
2629
2630// fextend f32 -> f64
2631def : Pat<(f64 (fextend Float32Regs:$a)),
2632          (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
2633def : Pat<(f64 (fextend Float32Regs:$a)),
2634          (CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
2635
2636def retflag       : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2637                           [SDNPHasChain, SDNPOptInGlue]>;
2638
2639//-----------------------------------
2640// Control-flow
2641//-----------------------------------
2642
2643let isTerminator=1 in {
2644   let isReturn=1, isBarrier=1 in
2645      def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2646
2647   let isBranch=1 in
2648      def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2649                          "@$a bra \t$target;",
2650                           [(brcond Int1Regs:$a, bb:$target)]>;
2651   let isBranch=1 in
2652      def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2653                          "@!$a bra \t$target;",
2654                           []>;
2655
2656   let isBranch=1, isBarrier=1 in
2657      def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2658                        "bra.uni \t$target;",
2659                  [(br bb:$target)]>;
2660}
2661
2662def : Pat<(brcond Int32Regs:$a, bb:$target),
2663          (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
2664
2665// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2666// conditional branch if
2667// the target block is the next block so that the code can fall through to the
2668// target block.
2669// The invertion is done by 'xor condition, 1', which will be translated to
2670// (setne condition, -1).
2671// Since ptx supports '@!pred bra target', we should use it.
2672def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2673  (CBranchOther Int1Regs:$a, bb:$target)>;
2674
2675// Call
2676def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2677def SDT_NVPTXCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2678                                        SDTCisVT<1, i32> ]>;
2679
2680def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2681                           [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2682def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_NVPTXCallSeqEnd,
2683                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2684                           SDNPSideEffect]>;
2685
2686def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2687def call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2688                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2689def calltarget : Operand<i32>;
2690let isCall=1 in {
2691   def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2692                  "call \t$dst, (1);", []>;
2693}
2694
2695def : Pat<(call tglobaladdr:$dst),
2696          (CALL tglobaladdr:$dst)>;
2697def : Pat<(call texternalsym:$dst),
2698          (CALL texternalsym:$dst)>;
2699
2700// Pseudo instructions.
2701class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2702   : NVPTXInst<outs, ins, asmstr, pattern>;
2703
2704// @TODO: We use some tricks here to emit curly braces.  Can we clean this up
2705// a bit without TableGen modifications?
2706def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2707  "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2708                               [(callseq_start timm:$amt)]>;
2709def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2710  "\n\t//{{\n\t}}// Callseq End $amt1",
2711                            [(callseq_end timm:$amt1, timm:$amt2)]>;
2712
2713// trap instruction
2714
2715def trapinst : NVPTXInst<(outs), (ins),
2716                         "trap;",
2717                         [(trap)]>;
2718
2719// Call prototype wrapper
2720def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
2721def CallPrototype
2722  : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
2723           [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
2724def ProtoIdent : Operand<i32> {
2725  let PrintMethod = "printProtoIdent";
2726}
2727def CALL_PROTOTYPE
2728  : NVPTXInst<(outs), (ins ProtoIdent:$ident),
2729              "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
2730
2731
2732
2733include "NVPTXIntrinsics.td"
2734
2735
2736//-----------------------------------
2737// Notes
2738//-----------------------------------
2739// BSWAP is currently expanded. The following is a more efficient
2740// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2741// - for sm_20, use pmpt (use vector scalar mov to get the pack and
2742//   unpack). sm_20 supports native 32-bit register, but not native 16-bit
2743// register.
2744