• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the PTX instructions in TableGen format.
11//
12//===----------------------------------------------------------------------===//
13
14include "NVPTXInstrFormats.td"
15
16// A NOP instruction
17def NOP : NVPTXInst<(outs), (ins), "", []>;
18
19// List of vector specific properties
20def isVecLD      : VecInstTypeEnum<1>;
21def isVecST      : VecInstTypeEnum<2>;
22def isVecBuild   : VecInstTypeEnum<3>;
23def isVecShuffle : VecInstTypeEnum<4>;
24def isVecExtract : VecInstTypeEnum<5>;
25def isVecInsert  : VecInstTypeEnum<6>;
26def isVecDest    : VecInstTypeEnum<7>;
27def isVecOther   : VecInstTypeEnum<15>;
28
29//===----------------------------------------------------------------------===//
30// NVPTX Operand Definitions.
31//===----------------------------------------------------------------------===//
32
33def brtarget    : Operand<OtherVT>;
34
35//===----------------------------------------------------------------------===//
36// NVPTX Instruction Predicate Definitions
37//===----------------------------------------------------------------------===//
38
39
40def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
41def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
42def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
43def useAtomRedG32forGen32 :
44  Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
45def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
46def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
47def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
48def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
49def useAtomRedG64forGen64 :
50  Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
51def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
52def hasVote : Predicate<"Subtarget.hasVote()">;
53def hasDouble : Predicate<"Subtarget.hasDouble()">;
54def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
55def hasLDG : Predicate<"Subtarget.hasLDG()">;
56def hasLDU : Predicate<"Subtarget.hasLDU()">;
57def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
58
59def doF32FTZ : Predicate<"UseF32FTZ">;
60
61def doFMAF32      : Predicate<"doFMAF32">;
62def doFMAF32_ftz  : Predicate<"(doFMAF32 && UseF32FTZ)">;
63def doFMAF32AGG      : Predicate<"doFMAF32AGG">;
64def doFMAF32AGG_ftz  : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
65def doFMAF64      : Predicate<"doFMAF64">;
66def doFMAF64AGG      : Predicate<"doFMAF64AGG">;
67def doFMADF32     : Predicate<"doFMADF32">;
68def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
69
70def doMulWide      : Predicate<"doMulWide">;
71
72def allowFMA : Predicate<"allowFMA">;
73def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
74
75def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
76def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
77
78def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
79
80def true : Predicate<"1">;
81
82//===----------------------------------------------------------------------===//
83// Special Handling for 8-bit Operands and Operations
84//
85// PTX supports 8-bit signed and unsigned types, but does not support 8-bit
86// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
87// 8-bit registers.
88//
89// PTX ld, st and cvt instructions permit source and destination data operands
90// to be wider than the instruction-type size, so that narrow values may be
91// loaded, stored, and converted using regular-width registers.
92//
93// So in PTX generation, we
94// - always use 16-bit registers in place in 8-bit registers.
95//   (8-bit variables should stay as 8-bit as they represent memory layout.)
96// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
97//   before operation
98//   . div
99//   . rem
100//   . neg (sign)
101//   . set, setp
102//   . shr
103//
104// We are patching the operations by inserting the cvt instructions in the
105// asm strings of the affected instructions.
106//
107// Since vector operations, except for ld/st, are eventually elementized. We
108// do not need to special-hand the vector 8-bit operations.
109//
110//
111//===----------------------------------------------------------------------===//
112
113// Generate string block like
114// {
115//   .reg .s16 %temp1;
116//   .reg .s16 %temp2;
117//   cvt.s16.s8 %temp1, %a;
118//   cvt.s16.s8 %temp2, %b;
119//   opc.s16    %dst, %temp1, %temp2;
120// }
121// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
122class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
123  string s = !strconcat("{{\n\t",
124             !strconcat(".reg .", !strconcat(TypeStr,
125             !strconcat(" \t%temp1;\n\t",
126             !strconcat(".reg .", !strconcat(TypeStr,
127             !strconcat(" \t%temp2;\n\t",
128             !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
129             !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
130             !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
131}
132
133// Generate string block like
134// {
135//   .reg .s16 %temp1;
136//   .reg .s16 %temp2;
137//   cvt.s16.s8 %temp1, %a;
138//   mov.b16    %temp2, %b;
139//   cvt.s16.s8 %temp2, %temp2;
140//   opc.s16    %dst, %temp1, %temp2;
141// }
142// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
143class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
144  string s = !strconcat("{{\n\t",
145             !strconcat(".reg .", !strconcat(TypeStr,
146             !strconcat(" \t%temp1;\n\t",
147             !strconcat(".reg .",
148             !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
149             !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
150             !strconcat("mov.b16 \t%temp2, $b;\n\t",
151             !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
152             !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
153}
154
155// Generate string block like
156// {
157//   .reg .s16 %temp1;
158//   .reg .s16 %temp2;
159//   mov.b16    %temp1, %b;
160//   cvt.s16.s8 %temp1, %temp1;
161//   cvt.s16.s8 %temp2, %a;
162//   opc.s16    %dst, %temp1, %temp2;
163// }
164// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
165class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
166  string s = !strconcat("{{\n\t",
167             !strconcat(".reg .", !strconcat(TypeStr,
168             !strconcat(" \t%temp1;\n\t",
169             !strconcat(".reg .", !strconcat(TypeStr,
170             !strconcat(" \t%temp2;\n\t",
171             !strconcat("mov.b16 \t%temp1, $a;\n\t",
172             !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
173             !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
174             !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
175}
176
177
178//===----------------------------------------------------------------------===//
179// Some Common Instruction Class Templates
180//===----------------------------------------------------------------------===//
181
182multiclass I3<string OpcStr, SDNode OpNode> {
183  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
184                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
185                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
186                       Int64Regs:$b))]>;
187  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
188                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
189                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
190  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
191                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
192                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
193                       Int32Regs:$b))]>;
194  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
195                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
196                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
197  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
198                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
199                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
200                       Int16Regs:$b))]>;
201  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
202                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
203                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
204  def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
205                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
206                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
207  def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
208                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
209                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
210}
211
212multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
213  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
214                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
215                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
216                       Int64Regs:$b))]>;
217  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
218                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
219                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
220  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
221                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
222                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
223                       Int32Regs:$b))]>;
224  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
225                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
226                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
227  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
228                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
229                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
230                       Int16Regs:$b))]>;
231  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
232                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
233                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
234  def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
235                     Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
236                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
237  def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
238                     Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
239                     [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
240}
241
242multiclass I3_noi8<string OpcStr, SDNode OpNode> {
243  def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
244                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
245                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
246                       Int64Regs:$b))]>;
247  def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
248                     !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
249                     [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
250  def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
251                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
252                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
253                       Int32Regs:$b))]>;
254  def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
255                     !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
256                     [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
257  def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
258                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
259                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
260                       Int16Regs:$b))]>;
261  def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
262                     !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
263                     [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
264}
265
266multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
267   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
268       Int32Regs:$b),
269                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
270                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
271                        Int32Regs:$b))]>;
272   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
273                      !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
274                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
275}
276
277multiclass F3<string OpcStr, SDNode OpNode> {
278   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
279                      (ins Float64Regs:$a, Float64Regs:$b),
280                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
281                      [(set Float64Regs:$dst,
282                        (OpNode Float64Regs:$a, Float64Regs:$b))]>,
283                      Requires<[allowFMA]>;
284   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
285                      (ins Float64Regs:$a, f64imm:$b),
286                      !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
287                      [(set Float64Regs:$dst,
288                        (OpNode Float64Regs:$a, fpimm:$b))]>,
289                      Requires<[allowFMA]>;
290   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
291                      (ins Float32Regs:$a, Float32Regs:$b),
292                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
293                      [(set Float32Regs:$dst,
294                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
295                      Requires<[allowFMA_ftz]>;
296   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
297                      (ins Float32Regs:$a, f32imm:$b),
298                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
299                      [(set Float32Regs:$dst,
300                        (OpNode Float32Regs:$a, fpimm:$b))]>,
301                      Requires<[allowFMA_ftz]>;
302   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
303                      (ins Float32Regs:$a, Float32Regs:$b),
304                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
305                      [(set Float32Regs:$dst,
306                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
307                      Requires<[allowFMA]>;
308   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
309                      (ins Float32Regs:$a, f32imm:$b),
310                      !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
311                      [(set Float32Regs:$dst,
312                        (OpNode Float32Regs:$a, fpimm:$b))]>,
313                      Requires<[allowFMA]>;
314}
315
316multiclass F3_rn<string OpcStr, SDNode OpNode> {
317   def f64rr : NVPTXInst<(outs Float64Regs:$dst),
318                      (ins Float64Regs:$a, Float64Regs:$b),
319                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
320                      [(set Float64Regs:$dst,
321                        (OpNode Float64Regs:$a, Float64Regs:$b))]>;
322   def f64ri : NVPTXInst<(outs Float64Regs:$dst),
323                      (ins Float64Regs:$a, f64imm:$b),
324                      !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
325                      [(set Float64Regs:$dst,
326                        (OpNode Float64Regs:$a, fpimm:$b))]>;
327   def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
328                      (ins Float32Regs:$a, Float32Regs:$b),
329                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
330                      [(set Float32Regs:$dst,
331                        (OpNode Float32Regs:$a, Float32Regs:$b))]>,
332                      Requires<[doF32FTZ]>;
333   def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
334                      (ins Float32Regs:$a, f32imm:$b),
335                      !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
336                      [(set Float32Regs:$dst,
337                        (OpNode Float32Regs:$a, fpimm:$b))]>,
338                      Requires<[doF32FTZ]>;
339   def f32rr : NVPTXInst<(outs Float32Regs:$dst),
340                      (ins Float32Regs:$a, Float32Regs:$b),
341                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
342                      [(set Float32Regs:$dst,
343                        (OpNode Float32Regs:$a, Float32Regs:$b))]>;
344   def f32ri : NVPTXInst<(outs Float32Regs:$dst),
345                      (ins Float32Regs:$a, f32imm:$b),
346                      !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
347                      [(set Float32Regs:$dst,
348                        (OpNode Float32Regs:$a, fpimm:$b))]>;
349}
350
351multiclass F2<string OpcStr, SDNode OpNode> {
352   def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
353                      !strconcat(OpcStr, ".f64 \t$dst, $a;"),
354                      [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
355   def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
356                      !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
357                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
358                      Requires<[doF32FTZ]>;
359   def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
360                      !strconcat(OpcStr, ".f32 \t$dst, $a;"),
361                      [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
362}
363
364//===----------------------------------------------------------------------===//
365// NVPTX Instructions.
366//===----------------------------------------------------------------------===//
367
368//-----------------------------------
369// Integer Arithmetic
370//-----------------------------------
371
372multiclass ADD_SUB_i1<SDNode OpNode> {
373   def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
374          "xor.pred \t$dst, $a, $b;",
375      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
376   def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
377          "xor.pred \t$dst, $a, $b;",
378      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
379}
380
381defm ADD_i1 : ADD_SUB_i1<add>;
382defm SUB_i1 : ADD_SUB_i1<sub>;
383
384
385defm ADD : I3<"add.s", add>;
386defm SUB : I3<"sub.s", sub>;
387
388defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
389defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
390
391defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
392defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
393
394//mul.wide PTX instruction
395def SInt32Const : PatLeaf<(imm), [{
396  const APInt &v = N->getAPIntValue();
397  if (v.isSignedIntN(32))
398    return true;
399  return false;
400}]>;
401
402def UInt32Const : PatLeaf<(imm), [{
403  const APInt &v = N->getAPIntValue();
404  if (v.isIntN(32))
405    return true;
406  return false;
407}]>;
408
409def SInt16Const : PatLeaf<(imm), [{
410  const APInt &v = N->getAPIntValue();
411  if (v.isSignedIntN(16))
412    return true;
413  return false;
414}]>;
415
416def UInt16Const : PatLeaf<(imm), [{
417  const APInt &v = N->getAPIntValue();
418  if (v.isIntN(16))
419    return true;
420  return false;
421}]>;
422
423def Int5Const : PatLeaf<(imm), [{
424  const APInt &v = N->getAPIntValue();
425  // Check if 0 <= v < 32
426  // Only then the result from (x << v) will be i32
427  if (v.sge(0) && v.slt(32))
428    return true;
429  return false;
430}]>;
431
432def Int4Const : PatLeaf<(imm), [{
433  const APInt &v = N->getAPIntValue();
434  // Check if 0 <= v < 16
435  // Only then the result from (x << v) will be i16
436  if (v.sge(0) && v.slt(16))
437    return true;
438  return false;
439}]>;
440
441def SHL2MUL32 : SDNodeXForm<imm, [{
442  const APInt &v = N->getAPIntValue();
443  APInt temp(32, 1);
444  return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
445}]>;
446
447def SHL2MUL16 : SDNodeXForm<imm, [{
448  const APInt &v = N->getAPIntValue();
449  APInt temp(16, 1);
450  return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
451}]>;
452
453def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
454                           (ins Int32Regs:$a, Int32Regs:$b),
455                           "mul.wide.s32 \t$dst, $a, $b;", []>;
456def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
457                            (ins Int32Regs:$a, i64imm:$b),
458                           "mul.wide.s32 \t$dst, $a, $b;", []>;
459
460def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
461                           (ins Int32Regs:$a, Int32Regs:$b),
462                           "mul.wide.u32 \t$dst, $a, $b;", []>;
463def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
464                            (ins Int32Regs:$a, i64imm:$b),
465                           "mul.wide.u32 \t$dst, $a, $b;", []>;
466
467def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
468                            (ins Int16Regs:$a, Int16Regs:$b),
469                           "mul.wide.s16 \t$dst, $a, $b;", []>;
470def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
471                            (ins Int16Regs:$a, i32imm:$b),
472                           "mul.wide.s16 \t$dst, $a, $b;", []>;
473
474def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
475                            (ins Int16Regs:$a, Int16Regs:$b),
476                           "mul.wide.u16 \t$dst, $a, $b;", []>;
477def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
478                            (ins Int16Regs:$a, i32imm:$b),
479                           "mul.wide.u16 \t$dst, $a, $b;", []>;
480
481def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
482          (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
483          Requires<[doMulWide]>;
484def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
485          (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
486          Requires<[doMulWide]>;
487
488def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
489          (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
490          Requires<[doMulWide]>;
491def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
492          (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
493          Requires<[doMulWide]>;
494
495def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
496          (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
497          Requires<[doMulWide]>;
498def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
499          (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
500          Requires<[doMulWide]>;
501
502def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
503          (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
504def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
505          (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
506          Requires<[doMulWide]>;
507
508def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
509          (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
510def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
511          (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
512          Requires<[doMulWide]>;
513
514def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
515          (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
516def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
517          (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
518          Requires<[doMulWide]>;
519
520defm MULT : I3<"mul.lo.s", mul>;
521
522defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
523defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
524def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
525            !strconcat("{{ \n\t",
526            !strconcat(".reg \t.s16 temp1; \n\t",
527            !strconcat(".reg \t.s16 temp2; \n\t",
528            !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
529            !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
530            !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
531            !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
532            !strconcat("}}", "")))))))),
533      [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
534def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
535            !strconcat("{{ \n\t",
536            !strconcat(".reg \t.s16 temp1; \n\t",
537            !strconcat(".reg \t.s16 temp2; \n\t",
538            !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
539            !strconcat("mov.b16 \ttemp2, $b; \n\t",
540            !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
541            !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
542            !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
543            !strconcat("}}", ""))))))))),
544      [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
545def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
546            !strconcat("{{ \n\t",
547            !strconcat(".reg \t.u16 temp1; \n\t",
548            !strconcat(".reg \t.u16 temp2; \n\t",
549            !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
550            !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
551            !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
552            !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
553            !strconcat("}}", "")))))))),
554      [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
555def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
556            !strconcat("{{ \n\t",
557            !strconcat(".reg \t.u16 temp1; \n\t",
558            !strconcat(".reg \t.u16 temp2; \n\t",
559            !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
560            !strconcat("mov.b16 \ttemp2, $b; \n\t",
561            !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
562            !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
563            !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
564            !strconcat("}}", ""))))))))),
565      [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
566
567
568defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
569defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
570
571defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
572// The ri version will not be selected as DAGCombiner::visitSREM will lower it.
573defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
574// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
575
576def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
577                      (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
578                      "mad.lo.s16 \t$dst, $a, $b, $c;",
579                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
580                        Int8Regs:$c))]>;
581def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
582                      (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
583                      "mad.lo.s16 \t$dst, $a, $b, $c;",
584                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
585                        imm:$c))]>;
586def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
587                      (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
588                      "mad.lo.s16 \t$dst, $a, $b, $c;",
589                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
590                        Int8Regs:$c))]>;
591def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
592                      (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
593                      "mad.lo.s16 \t$dst, $a, $b, $c;",
594                      [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
595                        imm:$c))]>;
596
597def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
598                      (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
599                      "mad.lo.s16 \t$dst, $a, $b, $c;",
600                      [(set Int16Regs:$dst, (add
601                        (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
602def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
603                      (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
604                      "mad.lo.s16 \t$dst, $a, $b, $c;",
605                      [(set Int16Regs:$dst, (add
606                        (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
607def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
608                      (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
609                      "mad.lo.s16 \t$dst, $a, $b, $c;",
610                      [(set Int16Regs:$dst, (add
611                        (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
612def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
613    (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
614                      "mad.lo.s16 \t$dst, $a, $b, $c;",
615                      [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
616                        imm:$c))]>;
617
618def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
619                      (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
620                      "mad.lo.s32 \t$dst, $a, $b, $c;",
621                      [(set Int32Regs:$dst, (add
622                        (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
623def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
624                      (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
625                      "mad.lo.s32 \t$dst, $a, $b, $c;",
626                      [(set Int32Regs:$dst, (add
627                        (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
628def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
629                      (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
630                      "mad.lo.s32 \t$dst, $a, $b, $c;",
631                      [(set Int32Regs:$dst, (add
632                        (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
633def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
634                      (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
635                      "mad.lo.s32 \t$dst, $a, $b, $c;",
636                      [(set Int32Regs:$dst, (add
637                        (mul Int32Regs:$a, imm:$b), imm:$c))]>;
638
639def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
640                      (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
641                      "mad.lo.s64 \t$dst, $a, $b, $c;",
642                      [(set Int64Regs:$dst, (add
643                        (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
644def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
645                      (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
646                      "mad.lo.s64 \t$dst, $a, $b, $c;",
647                      [(set Int64Regs:$dst, (add
648                        (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
649def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
650                      (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
651                      "mad.lo.s64 \t$dst, $a, $b, $c;",
652                      [(set Int64Regs:$dst, (add
653                        (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
654def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
655                      (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
656                      "mad.lo.s64 \t$dst, $a, $b, $c;",
657                      [(set Int64Regs:$dst, (add
658                        (mul Int64Regs:$a, imm:$b), imm:$c))]>;
659
660
661def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
662                     !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
663                                 "neg.s16 \t$dst, $dst;"),
664         [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
665def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
666                     "neg.s16 \t$dst, $src;",
667         [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
668def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
669                     "neg.s32 \t$dst, $src;",
670         [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
671def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
672                     "neg.s64 \t$dst, $src;",
673         [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
674
675//-----------------------------------
676// Floating Point Arithmetic
677//-----------------------------------
678
679// Constant 1.0f
680def FloatConst1 : PatLeaf<(fpimm), [{
681    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
682      return false;
683    float f = (float)N->getValueAPF().convertToFloat();
684    return (f==1.0f);
685}]>;
686// Constand (double)1.0
687def DoubleConst1 : PatLeaf<(fpimm), [{
688    if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
689      return false;
690    double d = (double)N->getValueAPF().convertToDouble();
691    return (d==1.0);
692}]>;
693
694defm FADD : F3<"add", fadd>;
695defm FSUB : F3<"sub", fsub>;
696defm FMUL : F3<"mul", fmul>;
697
698defm FADD_rn : F3_rn<"add", fadd>;
699defm FSUB_rn : F3_rn<"sub", fsub>;
700defm FMUL_rn : F3_rn<"mul", fmul>;
701
702defm FABS : F2<"abs", fabs>;
703defm FNEG : F2<"neg", fneg>;
704defm FSQRT : F2<"sqrt.rn", fsqrt>;
705
706//
707// F64 division
708//
709def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
710                      (ins f64imm:$a, Float64Regs:$b),
711                      "rcp.rn.f64 \t$dst, $b;",
712                      [(set Float64Regs:$dst,
713                        (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
714def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
715                      (ins Float64Regs:$a, Float64Regs:$b),
716                      "div.rn.f64 \t$dst, $a, $b;",
717                      [(set Float64Regs:$dst,
718                        (fdiv Float64Regs:$a, Float64Regs:$b))]>;
719def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
720                      (ins Float64Regs:$a, f64imm:$b),
721                      "div.rn.f64 \t$dst, $a, $b;",
722                      [(set Float64Regs:$dst,
723                        (fdiv Float64Regs:$a, fpimm:$b))]>;
724
725//
726// F32 Approximate reciprocal
727//
728def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
729                      (ins f32imm:$a, Float32Regs:$b),
730                      "rcp.approx.ftz.f32 \t$dst, $b;",
731                      [(set Float32Regs:$dst,
732                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
733                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
734def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
735                        (ins f32imm:$a, Float32Regs:$b),
736                       "rcp.approx.f32 \t$dst, $b;",
737                      [(set Float32Regs:$dst,
738                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
739                      Requires<[do_DIVF32_APPROX]>;
740//
741// F32 Approximate division
742//
743def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
744                      (ins Float32Regs:$a, Float32Regs:$b),
745                      "div.approx.ftz.f32 \t$dst, $a, $b;",
746                      [(set Float32Regs:$dst,
747                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
748                      Requires<[do_DIVF32_APPROX, doF32FTZ]>;
749def FDIV32approxrr     : NVPTXInst<(outs Float32Regs:$dst),
750                      (ins Float32Regs:$a, Float32Regs:$b),
751                      "div.approx.f32 \t$dst, $a, $b;",
752                      [(set Float32Regs:$dst,
753                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
754                      Requires<[do_DIVF32_APPROX]>;
755//
756// F32 Semi-accurate reciprocal
757//
758// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
759//
760def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
761                      (ins f32imm:$a, Float32Regs:$b),
762                      "rcp.approx.ftz.f32 \t$dst, $b;",
763                      [(set Float32Regs:$dst,
764                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
765                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
766def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
767                      (ins f32imm:$a, Float32Regs:$b),
768                      "rcp.approx.f32 \t$dst, $b;",
769                      [(set Float32Regs:$dst,
770                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
771                      Requires<[do_DIVF32_FULL]>;
772//
773// F32 Semi-accurate division
774//
775def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
776                      (ins Float32Regs:$a, Float32Regs:$b),
777                      "div.full.ftz.f32 \t$dst, $a, $b;",
778                      [(set Float32Regs:$dst,
779                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
780                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
781def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
782                      (ins Float32Regs:$a, f32imm:$b),
783                      "div.full.ftz.f32 \t$dst, $a, $b;",
784                      [(set Float32Regs:$dst,
785                        (fdiv Float32Regs:$a, fpimm:$b))]>,
786                      Requires<[do_DIVF32_FULL, doF32FTZ]>;
787def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
788                      (ins Float32Regs:$a, Float32Regs:$b),
789                      "div.full.f32 \t$dst, $a, $b;",
790                      [(set Float32Regs:$dst,
791                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
792                      Requires<[do_DIVF32_FULL]>;
793def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
794                      (ins Float32Regs:$a, f32imm:$b),
795                      "div.full.f32 \t$dst, $a, $b;",
796                      [(set Float32Regs:$dst,
797                        (fdiv Float32Regs:$a, fpimm:$b))]>,
798                      Requires<[do_DIVF32_FULL]>;
799//
800// F32 Accurate reciprocal
801//
802def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
803                        (ins f32imm:$a, Float32Regs:$b),
804                       "rcp.rn.ftz.f32 \t$dst, $b;",
805                      [(set Float32Regs:$dst,
806                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
807                      Requires<[reqPTX20, doF32FTZ]>;
808def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
809                      (ins f32imm:$a, Float32Regs:$b),
810                       "rcp.rn.f32 \t$dst, $b;",
811                      [(set Float32Regs:$dst,
812                        (fdiv FloatConst1:$a, Float32Regs:$b))]>,
813                      Requires<[reqPTX20]>;
814//
815// F32 Accurate division
816//
817def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
818                      (ins Float32Regs:$a, Float32Regs:$b),
819                      "div.rn.ftz.f32 \t$dst, $a, $b;",
820                      [(set Float32Regs:$dst,
821                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
822                      Requires<[doF32FTZ, reqPTX20]>;
823def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
824                      (ins Float32Regs:$a, f32imm:$b),
825                      "div.rn.ftz.f32 \t$dst, $a, $b;",
826                      [(set Float32Regs:$dst,
827                        (fdiv Float32Regs:$a, fpimm:$b))]>,
828                      Requires<[doF32FTZ, reqPTX20]>;
829def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
830                      (ins Float32Regs:$a, Float32Regs:$b),
831                      "div.rn.f32 \t$dst, $a, $b;",
832                      [(set Float32Regs:$dst,
833                        (fdiv Float32Regs:$a, Float32Regs:$b))]>,
834                      Requires<[reqPTX20]>;
835def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
836                      (ins Float32Regs:$a, f32imm:$b),
837                      "div.rn.f32 \t$dst, $a, $b;",
838                      [(set Float32Regs:$dst,
839                        (fdiv Float32Regs:$a, fpimm:$b))]>,
840                      Requires<[reqPTX20]>;
841
842
843multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
844   def rrr : NVPTXInst<(outs Float32Regs:$dst),
845                      (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
846                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
847                      [(set Float32Regs:$dst, (fadd
848                        (fmul Float32Regs:$a, Float32Regs:$b),
849                        Float32Regs:$c))]>, Requires<[Pred]>;
850   // This is to WAR a weird bug in Tablegen that does not automatically
851   // generate the following permutated rule rrr2 from the above rrr.
852   // So we explicitly add it here. This happens to FMA32 only.
853   // See the comments at FMAD32 and FMA32 for more information.
854   def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
855                        (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
856                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
857                      [(set Float32Regs:$dst, (fadd Float32Regs:$c,
858                        (fmul Float32Regs:$a, Float32Regs:$b)))]>,
859                      Requires<[Pred]>;
860   def rri : NVPTXInst<(outs Float32Regs:$dst),
861                      (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
862                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
863                      [(set Float32Regs:$dst, (fadd
864                        (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
865                      Requires<[Pred]>;
866   def rir : NVPTXInst<(outs Float32Regs:$dst),
867                      (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
868                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
869                      [(set Float32Regs:$dst, (fadd
870                        (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
871                      Requires<[Pred]>;
872   def rii : NVPTXInst<(outs Float32Regs:$dst),
873                      (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
874                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
875                      [(set Float32Regs:$dst, (fadd
876                        (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
877                      Requires<[Pred]>;
878}
879
880multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
881   def rrr : NVPTXInst<(outs Float64Regs:$dst),
882                      (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
883                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
884                      [(set Float64Regs:$dst, (fadd
885                        (fmul Float64Regs:$a, Float64Regs:$b),
886                        Float64Regs:$c))]>, Requires<[Pred]>;
887   def rri : NVPTXInst<(outs Float64Regs:$dst),
888                      (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
889                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
890                      [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
891                        Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
892   def rir : NVPTXInst<(outs Float64Regs:$dst),
893                      (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
894                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
895                      [(set Float64Regs:$dst, (fadd
896                        (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
897                      Requires<[Pred]>;
898   def rii : NVPTXInst<(outs Float64Regs:$dst),
899                      (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
900                      !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
901                      [(set Float64Regs:$dst, (fadd
902                        (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
903                      Requires<[Pred]>;
904}
905
906// Due to a unknown reason (most likely a bug in tablegen), tablegen does not
907// automatically generate the rrr2 rule from
908// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
909// If we reverse the order of the following two lines, then rrr2 rule will be
910// generated for FMA32, but not for rrr.
911// Therefore, we manually write the rrr2 rule in FPCONTRACT32.
912defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
913defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
914defm FMA32_ftz  : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
915defm FMA32  : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
916defm FMA64  : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
917
918// b*c-a => fmad(b, c, -a)
919multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
920  def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
921          (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
922          Requires<[Pred]>;
923}
924
925// a-b*c => fmad(-b,c, a)
926// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
927// b*c-a => fmad(b, c, -a)
928// - legal because b*c-a <=> b*c+(-a)
929multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
930  def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
931          (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
932          Requires<[Pred]>;
933  def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
934          (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
935          Requires<[Pred]>;
936}
937
938// a-b*c => fmad(-b,c, a)
939// b*c-a => fmad(b, c, -a)
940multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
941  def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
942          (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
943          Requires<[Pred]>;
944
945  def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
946          (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
947          Requires<[Pred]>;
948}
949
950defm FMAF32ext_ftz  : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
951defm FMAF32ext  : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
952defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
953defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
954defm FMAF64ext  : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
955
956def SINF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
957                      "sin.approx.f32 \t$dst, $src;",
958                      [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
959def COSF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
960                      "cos.approx.f32 \t$dst, $src;",
961                      [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
962
963//-----------------------------------
964// Logical Arithmetic
965//-----------------------------------
966
967multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
968  def b1rr:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
969                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
970                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
971  def b1ri:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
972                      !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
973                      [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
974  def b8rr:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
975                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
976                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
977  def b8ri:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
978                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
979                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
980  def b16rr:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
981                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
982                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
983                        Int16Regs:$b))]>;
984  def b16ri:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
985                      !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
986                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
987  def b32rr:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
988                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
989                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
990                        Int32Regs:$b))]>;
991  def b32ri:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
992                      !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
993                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
994  def b64rr:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
995                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
996                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
997                        Int64Regs:$b))]>;
998  def b64ri:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
999                      !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
1000                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1001}
1002
1003defm OR  : LOG_FORMAT<"or", or>;
1004defm AND : LOG_FORMAT<"and", and>;
1005defm XOR : LOG_FORMAT<"xor", xor>;
1006
1007def NOT1:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1008                      "not.pred \t$dst, $src;",
1009                      [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1010def NOT8:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
1011                      "not.b16 \t$dst, $src;",
1012                      [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
1013def NOT16:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1014                      "not.b16 \t$dst, $src;",
1015                      [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1016def NOT32:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1017                      "not.b32 \t$dst, $src;",
1018                      [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1019def NOT64:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1020                      "not.b64 \t$dst, $src;",
1021                      [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1022
1023// For shifts, the second src operand must be 32-bit value
1024multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1025   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1026                      Int32Regs:$b),
1027                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1028                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1029                        Int32Regs:$b))]>;
1030   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1031                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1032                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1033                        (i32 imm:$b)))]>;
1034   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1035                      Int32Regs:$b),
1036                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1037                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1038                        Int32Regs:$b))]>;
1039   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1040                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1041                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1042                        (i32 imm:$b)))]>;
1043   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1044                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1045                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1046                        (i32 imm:$b)))]>;
1047   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1048                      Int32Regs:$b),
1049                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1050                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1051                        Int32Regs:$b))]>;
1052   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1053                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1054                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1055                        (i32 imm:$b)))]>;
1056   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1057                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1058                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1059                        Int32Regs:$b))]>;
1060   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1061                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1062                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1063                        (i32 imm:$b)))]>;
1064}
1065
1066defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1067
1068// For shifts, the second src operand must be 32-bit value
1069// Need to add cvt for the 8-bits.
1070multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
1071   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1072                      Int32Regs:$b),
1073                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1074                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1075                        Int32Regs:$b))]>;
1076   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1077                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1078                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1079                        (i32 imm:$b)))]>;
1080   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1081                      Int32Regs:$b),
1082                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1083                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1084                        Int32Regs:$b))]>;
1085   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1086                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1087                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1088                        (i32 imm:$b)))]>;
1089   def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1090                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1091                      [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1092                        (i32 imm:$b)))]>;
1093   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1094                      Int32Regs:$b),
1095                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1096                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1097                        Int32Regs:$b))]>;
1098   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1099                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1100                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1101                        (i32 imm:$b)))]>;
1102   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1103                      !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1104                      !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1105                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1106                        Int32Regs:$b))]>;
1107   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1108                      !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1109                      !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1110                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1111                        (i32 imm:$b)))]>;
1112}
1113
1114defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
1115defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
1116
1117// 32bit
1118def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1119  (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1120    !strconcat("{{\n\t",
1121    !strconcat(".reg .b32 %lhs;\n\t",
1122    !strconcat(".reg .b32 %rhs;\n\t",
1123    !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1124    !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1125    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1126    !strconcat("}}", ""))))))),
1127    []>;
1128
1129def SUB_FRM_32 : SDNodeXForm<imm, [{
1130    return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1131}]>;
1132
1133def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1134          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
1135def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1136          (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
1137
1138def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1139    Int32Regs:$amt),
1140    !strconcat("{{\n\t",
1141    !strconcat(".reg .b32 %lhs;\n\t",
1142    !strconcat(".reg .b32 %rhs;\n\t",
1143    !strconcat(".reg .b32 %amt2;\n\t",
1144    !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1145    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1146    !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1147    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1148    !strconcat("}}", ""))))))))),
1149    [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
1150
1151def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1152    Int32Regs:$amt),
1153    !strconcat("{{\n\t",
1154    !strconcat(".reg .b32 %lhs;\n\t",
1155    !strconcat(".reg .b32 %rhs;\n\t",
1156    !strconcat(".reg .b32 %amt2;\n\t",
1157    !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1158    !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1159    !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1160    !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1161    !strconcat("}}", ""))))))))),
1162    [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
1163
1164// 64bit
1165def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1166    i32imm:$amt1, i32imm:$amt2),
1167    !strconcat("{{\n\t",
1168    !strconcat(".reg .b64 %lhs;\n\t",
1169    !strconcat(".reg .b64 %rhs;\n\t",
1170    !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1171    !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1172    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1173    !strconcat("}}", ""))))))),
1174    []>;
1175
1176def SUB_FRM_64 : SDNodeXForm<imm, [{
1177    return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1178}]>;
1179
1180def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1181          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1182def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1183          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1184
1185def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1186    Int32Regs:$amt),
1187    !strconcat("{{\n\t",
1188    !strconcat(".reg .b64 %lhs;\n\t",
1189    !strconcat(".reg .b64 %rhs;\n\t",
1190    !strconcat(".reg .u32 %amt2;\n\t",
1191    !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1192    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1193    !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1194    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1195    !strconcat("}}", ""))))))))),
1196    [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1197
1198def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1199    Int32Regs:$amt),
1200    !strconcat("{{\n\t",
1201    !strconcat(".reg .b64 %lhs;\n\t",
1202    !strconcat(".reg .b64 %rhs;\n\t",
1203    !strconcat(".reg .u32 %amt2;\n\t",
1204    !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1205    !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1206    !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1207    !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1208    !strconcat("}}", ""))))))))),
1209    [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1210
1211
1212//-----------------------------------
1213// Data Movement (Load / Store, Move)
1214//-----------------------------------
1215
1216def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1217  [SDNPWantRoot]>;
1218def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1219  [SDNPWantRoot]>;
1220
1221def MEMri : Operand<i32> {
1222  let PrintMethod = "printMemOperand";
1223  let MIOperandInfo = (ops Int32Regs, i32imm);
1224}
1225def MEMri64 : Operand<i64> {
1226  let PrintMethod = "printMemOperand";
1227  let MIOperandInfo = (ops Int64Regs, i64imm);
1228}
1229
1230def imem : Operand<iPTR> {
1231    let PrintMethod = "printOperand";
1232}
1233
1234def imemAny : Operand<iPTRAny> {
1235    let PrintMethod = "printOperand";
1236}
1237
1238def LdStCode : Operand<i32> {
1239    let PrintMethod = "printLdStCode";
1240}
1241
1242def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1243def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1244
1245def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1246                     "mov.u32 \t$dst, $a;",
1247                     [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1248
1249def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1250                     "mov.u64 \t$dst, $a;",
1251                     [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1252
1253// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1254let IsSimpleMove=1 in {
1255def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1256                   "mov.pred \t$dst, $sss;", []>;
1257def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
1258                    "mov.u16 \t$dst, $sss;", []>;
1259def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1260                    "mov.u16 \t$dst, $sss;", []>;
1261def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1262                    "mov.u32 \t$dst, $sss;", []>;
1263def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1264                    "mov.u64 \t$dst, $sss;", []>;
1265
1266def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1267                    "mov.f32 \t$dst, $src;", []>;
1268def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1269                    "mov.f64 \t$dst, $src;", []>;
1270}
1271def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1272                    "mov.pred \t$dst, $src;",
1273          [(set Int1Regs:$dst, imm:$src)]>;
1274def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
1275                    "mov.u16 \t$dst, $src;",
1276          [(set Int8Regs:$dst, imm:$src)]>;
1277def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1278                    "mov.u16 \t$dst, $src;",
1279          [(set Int16Regs:$dst, imm:$src)]>;
1280def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1281                    "mov.u32 \t$dst, $src;",
1282          [(set Int32Regs:$dst, imm:$src)]>;
1283def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1284                    "mov.u64 \t$dst, $src;",
1285          [(set Int64Regs:$dst, imm:$src)]>;
1286
1287def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1288                    "mov.f32 \t$dst, $src;",
1289          [(set Float32Regs:$dst, fpimm:$src)]>;
1290def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1291                    "mov.f64 \t$dst, $src;",
1292          [(set Float64Regs:$dst, fpimm:$src)]>;
1293
1294def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1295
1296//---- Copy Frame Index ----
1297def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1298                        "add.u32 \t$dst, ${addr:add};",
1299                        [(set Int32Regs:$dst, ADDRri:$addr)]>;
1300def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1301                        "add.u64 \t$dst, ${addr:add};",
1302                        [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1303
1304//-----------------------------------
1305// Comparison and Selection
1306//-----------------------------------
1307
1308// Generate string block like
1309// {
1310//   .reg .pred p;
1311//   setp.gt.s16 p, %a, %b;
1312//   selp.s16 %dst, -1, 0, p;
1313// }
1314// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
1315class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
1316  string b> {
1317  string t1  = "{{\n\t.reg .pred p;\n\t";
1318  string t2  = !strconcat(t1 , OpcStr);
1319  string t3  = !strconcat(t2 , sz1);
1320  string t4  = !strconcat(t3 , " \tp, ");
1321  string t5  = !strconcat(t4 , a);
1322  string t6  = !strconcat(t5 , ", ");
1323  string t7  = !strconcat(t6 , b);
1324  string t8  = !strconcat(t7 , ";\n\tselp.s");
1325  string t9  = !strconcat(t8 , sz2);
1326  string t10 = !strconcat(t9, " \t");
1327  string t11 = !strconcat(t10, d);
1328  string s   = !strconcat(t11, ", -1, 0, p;\n\t}}");
1329}
1330
1331// Generate string block like
1332// {
1333//   .reg .pred p;
1334//   .reg .s16 %temp1;
1335//   .reg .s16 %temp2;
1336//   cvt.s16.s8 %temp1, %a;
1337//   cvt s16.s8 %temp1, %b;
1338//   setp.gt.s16 p, %temp1, %temp2;
1339//   selp.s16 %dst, -1, 0, p;
1340// }
1341// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
1342class Set_Stri8<string OpcStr, string d, string a, string b, string type,
1343  string cvt> {
1344  string t1  = "{{\n\t.reg .pred p;\n\t";
1345  string t2  = !strconcat(t1, ".reg .");
1346  string t3  = !strconcat(t2, type);
1347  string t4  = !strconcat(t3, " %temp1;\n\t");
1348  string t5  = !strconcat(t4, ".reg .");
1349  string t6  = !strconcat(t5, type);
1350  string t7  = !strconcat(t6, " %temp2;\n\t");
1351  string t8  = !strconcat(t7, cvt);
1352  string t9  = !strconcat(t8, " \t%temp1, ");
1353  string t10 = !strconcat(t9, a);
1354  string t11 = !strconcat(t10, ";\n\t");
1355  string t12 = !strconcat(t11, cvt);
1356  string t13 = !strconcat(t12, " \t%temp2, ");
1357  string t14 = !strconcat(t13, b);
1358  string t15 = !strconcat(t14, ";\n\t");
1359  string t16 = !strconcat(t15, OpcStr);
1360  string t17 = !strconcat(t16, "16");
1361  string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
1362  string t19 = !strconcat(t18, "selp.s16 \t");
1363  string t20 = !strconcat(t19, d);
1364  string s   = !strconcat(t20, ", -1, 0, p;\n\t}}");
1365}
1366
1367multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
1368  string TypeStr, string CVTStr> {
1369  def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1370                     Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
1371               []>;
1372  def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1373      Int16Regs:$b),
1374                     Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
1375               []>;
1376  def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1377      Int32Regs:$b),
1378                     Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
1379               []>;
1380  def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1381      Int64Regs:$b),
1382                     Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
1383               []>;
1384
1385  def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1386                     Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
1387               [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1388  def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1389                     Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
1390               [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1391  def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1392                     Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
1393               [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1394  def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1395                 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1396               [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1397  def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1398                 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1399               [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1400  def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1401                 !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1402               [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1403  def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1404                 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1405               [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1406  def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1407                 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1408               [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1409  def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1410                 !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1411               [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1412  def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1413                 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1414               [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1415  def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1416                 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1417               [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1418  def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1419                 !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1420               [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1421
1422  def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1423                     Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
1424               [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1425  def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1426                     Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
1427               [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1428  def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1429                     Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
1430               [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1431  def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
1432      Int16Regs:$b),
1433                 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1434               [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1435  def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1436                 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1437               [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1438  def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1439                 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1440               [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1441  def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1442      Int32Regs:$b),
1443                 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1444               [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1445  def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1446                 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1447               [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1448  def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1449                 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1450               [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1451  def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
1452      Int64Regs:$b),
1453                 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1454               [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1455  def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1456                 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1457               [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1458  def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1459                 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1460               [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1461}
1462
1463multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
1464  def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1465      Float32Regs:$b),
1466                     Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
1467               []>, Requires<[doF32FTZ]>;
1468  def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1469      Float32Regs:$b),
1470                     Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
1471               []>;
1472  def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
1473      Float64Regs:$b),
1474                     Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
1475               []>;
1476  def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
1477      Float64Regs:$b),
1478                     Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
1479               []>;
1480
1481  def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
1482      , Float32Regs:$b),
1483                 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1484               [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
1485  , Requires<[doF32FTZ]>;
1486  def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
1487    (ins Float32Regs:$a, Float32Regs:$b),
1488                 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1489               [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1490  def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1491    (ins Float32Regs:$a, f32imm:$b),
1492                 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1493               [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
1494  Requires<[doF32FTZ]>;
1495  def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
1496                 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1497               [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1498  def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1499    (ins f32imm:$a, Float32Regs:$b),
1500                 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1501               [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
1502  Requires<[doF32FTZ]>;
1503  def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
1504                 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1505               [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1506  def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
1507    (ins Float64Regs:$a, Float64Regs:$b),
1508                 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1509               [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1510  def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
1511                 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1512               [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1513  def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
1514                 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1515               [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1516
1517  def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1518    (ins Float32Regs:$a, Float32Regs:$b),
1519                 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1520               [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1521  def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1522    (ins Float32Regs:$a, Float32Regs:$b),
1523                 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1524               [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1525  def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1526    (ins Float32Regs:$a, f32imm:$b),
1527                 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1528               [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1529  def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1530    (ins Float32Regs:$a, f32imm:$b),
1531                 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1532               [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1533  def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1534    (ins f32imm:$a, Float32Regs:$b),
1535                 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1536               [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1537  def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1538    (ins f32imm:$a, Float32Regs:$b),
1539                 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1540               [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1541  def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1542    (ins Float64Regs:$a, Float64Regs:$b),
1543                 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1544               [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1545  def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1546    (ins Float64Regs:$a, f64imm:$b),
1547                 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1548               [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1549  def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1550    (ins f64imm:$a, Float64Regs:$b),
1551                 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1552               [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1553}
1554
1555defm ISetSGT
1556: ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
1557defm ISetUGT
1558: ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
1559defm ISetSLT
1560: ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
1561defm ISetULT
1562: ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
1563defm ISetSGE
1564: ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
1565defm ISetUGE
1566: ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
1567defm ISetSLE
1568: ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
1569defm ISetULE
1570: ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
1571defm ISetSEQ
1572: ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
1573defm ISetUEQ
1574: ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
1575defm ISetSNE
1576: ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
1577defm ISetUNE
1578: ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
1579
1580def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1581  (ins Int1Regs:$a, Int1Regs:$b),
1582                      "xor.pred \t$dst, $a, $b;",
1583            [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1584def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1585  (ins Int1Regs:$a, Int1Regs:$b),
1586                      "xor.pred \t$dst, $a, $b;",
1587            [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
1588def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1589  (ins Int1Regs:$a, Int1Regs:$b),
1590            !strconcat("{{\n\t",
1591            !strconcat(".reg .pred temp;\n\t",
1592            !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1593            !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1594            [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1595def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1596  (ins Int1Regs:$a, Int1Regs:$b),
1597            !strconcat("{{\n\t",
1598            !strconcat(".reg .pred temp;\n\t",
1599            !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1600            !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1601            [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
1602
1603// Compare 2 i1's and produce a u32
1604def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1605  (ins Int1Regs:$a, Int1Regs:$b),
1606                  !strconcat("{{\n\t",
1607                  !strconcat(".reg .pred temp;\n\t",
1608                  !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1609                  !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
1610                  [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1611def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1612  (ins Int1Regs:$a, Int1Regs:$b),
1613                  !strconcat("{{\n\t",
1614                  !strconcat(".reg .pred temp;\n\t",
1615                  !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1616                  !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
1617                  [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1618
1619defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
1620defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
1621defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
1622defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
1623defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
1624defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
1625
1626defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
1627defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
1628defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
1629defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
1630defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
1631defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
1632
1633defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
1634defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
1635
1636def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1637                     (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1638                             (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1639def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
1640  (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
1641                      "selp.b16 \t$dst, $a, $b, $p;",
1642      [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
1643def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
1644  (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
1645                      "selp.b16 \t$dst, $a, $b, $p;",
1646      [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
1647def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
1648  (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
1649                      "selp.b16 \t$dst, $a, $b, $p;",
1650      [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
1651def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
1652  (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
1653                      "selp.b16 \t$dst, $a, $b, $p;",
1654      [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1655
1656def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
1657  (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
1658                      "selp.b16 \t$dst, $a, $b, $p;",
1659      [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
1660def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
1661  (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
1662                      "selp.b16 \t$dst, $a, $b, $p;",
1663      [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
1664def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
1665  (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
1666                      "selp.b16 \t$dst, $a, $b, $p;",
1667      [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
1668def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
1669  (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
1670                      "selp.b16 \t$dst, $a, $b, $p;",
1671      [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1672
1673def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
1674  (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
1675                      "selp.b32 \t$dst, $a, $b, $p;",
1676      [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
1677def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
1678  (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
1679                      "selp.b32 \t$dst, $a, $b, $p;",
1680      [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
1681def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
1682  (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
1683                      "selp.b32 \t$dst, $a, $b, $p;",
1684      [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
1685def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
1686  (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
1687                      "selp.b32 \t$dst, $a, $b, $p;",
1688      [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1689
1690def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
1691  (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
1692                      "selp.b64 \t$dst, $a, $b, $p;",
1693      [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
1694def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
1695  (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
1696                      "selp.b64 \t$dst, $a, $b, $p;",
1697      [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
1698def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
1699  (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
1700                      "selp.b64 \t$dst, $a, $b, $p;",
1701      [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
1702def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
1703  (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
1704                      "selp.b64 \t$dst, $a, $b, $p;",
1705      [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1706
1707def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
1708  (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
1709                      "selp.f32 \t$dst, $a, $b, $p;",
1710      [(set Float32Regs:$dst,
1711        (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
1712def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
1713  (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
1714                      "selp.f32 \t$dst, $a, $b, $p;",
1715      [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
1716def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
1717  (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
1718                      "selp.f32 \t$dst, $a, $b, $p;",
1719      [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
1720def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
1721  (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
1722                      "selp.f32 \t$dst, $a, $b, $p;",
1723      [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1724
1725def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
1726  (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
1727                      "selp.f64 \t$dst, $a, $b, $p;",
1728      [(set Float64Regs:$dst,
1729        (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
1730def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
1731  (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
1732                      "selp.f64 \t$dst, $a, $b, $p;",
1733      [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
1734def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
1735  (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
1736                      "selp.f64 \t$dst, $a, $b, $p;",
1737      [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
1738def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
1739  (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
1740                      "selp.f64 \t $dst, $a, $b, $p;",
1741      [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1742
1743//def ld_param         : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1744//                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1745
1746def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1747  SDTCisInt<2>]>;
1748def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1749  SDTCisInt<1>, SDTCisInt<2>]>;
1750def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1751def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1752def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1753def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1754def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1755def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1756def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1757def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1758def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1759def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1760def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
1761def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1762def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1763
1764def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1765                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1766def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1767  SDTDeclareScalarParamProfile,
1768                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1769def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1770  SDTDeclareParamProfile,
1771                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1772def DeclareRet   : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1773                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1774def LoadParam    : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1775                         [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1776def PrintCall    : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1777                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1778def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1779                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1780def StoreParam   : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1781                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1782def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1783                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1784def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1785                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1786def MoveToParam  : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
1787                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1788def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1789                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1790def CallArg      : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1791                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1792def LastCallArg  : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1793                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1794def CallArgEnd   : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1795                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1796def CallVoid     : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1797                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1798def Prototype    : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1799                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1800def CallVal      : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1801                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1802def MoveParam    : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1803                         []>;
1804def MoveRetval   : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
1805                         [SDNPHasChain, SDNPSideEffect]>;
1806def StoreRetval  : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1807                         [SDNPHasChain, SDNPSideEffect]>;
1808def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
1809                         [SDNPHasChain, SDNPSideEffect]>;
1810def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1811  SDTPseudoUseParamProfile,
1812                       [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1813def RETURNNode   : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1814                         [SDNPHasChain, SDNPSideEffect]>;
1815
1816class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1817      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1818                !strconcat(!strconcat("ld.param", opstr),
1819                "\t$dst, [retval0+$b];"),
1820                [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1821
1822class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1823      NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1824                !strconcat(!strconcat("mov", opstr),
1825                "\t$dst, retval$b;"),
1826                [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1827
1828class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1829      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1830                !strconcat(!strconcat("st.param", opstr),
1831                "\t[param$a+$b], $val;"),
1832                [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1833
1834class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
1835      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1836                !strconcat(!strconcat("mov", opstr),
1837                "\tparam$a, $val;"),
1838                [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1839
1840class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1841      NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1842                !strconcat(!strconcat("st.param", opstr),
1843                "\t[func_retval0+$a], $val;"),
1844                [(StoreRetval (i32 imm:$a), regclass:$val)]>;
1845
1846class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
1847      NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
1848                !strconcat(!strconcat("mov", opstr),
1849                "\tfunc_retval$num, $val;"),
1850                [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
1851
1852class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
1853      NVPTXInst<(outs), (ins regclass:$val),
1854                !strconcat(!strconcat("mov", opstr),
1855                "\tfunc_retval0, $val;"),
1856                [(MoveRetval regclass:$val)]>;
1857
1858def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1859"call (retval0), ",
1860                                [(PrintCall (i32 1))]>;
1861def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1862"call (retval0, retval1), ",
1863                                [(PrintCall (i32 2))]>;
1864def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1865"call (retval0, retval1, retval2), ",
1866                                [(PrintCall (i32 3))]>;
1867def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1868"call (retval0, retval1, retval2, retval3), ",
1869                                [(PrintCall (i32 4))]>;
1870def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1871"call (retval0, retval1, retval2, retval3, retval4), ",
1872                                [(PrintCall (i32 5))]>;
1873def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1874"call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1875                                [(PrintCall (i32 6))]>;
1876def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1877"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1878                                [(PrintCall (i32 7))]>;
1879def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1880!strconcat("call (retval0, retval1, retval2, retval3, retval4",
1881           ", retval5, retval6, retval7), "),
1882                                [(PrintCall (i32 8))]>;
1883
1884def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1885                                [(PrintCall (i32 0))]>;
1886
1887def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1888"call.uni (retval0), ",
1889                                [(PrintCallUni (i32 1))]>;
1890def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1891"call.uni (retval0, retval1), ",
1892                                [(PrintCallUni (i32 2))]>;
1893def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1894"call.uni (retval0, retval1, retval2), ",
1895                                [(PrintCallUni (i32 3))]>;
1896def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1897"call.uni (retval0, retval1, retval2, retval3), ",
1898                                [(PrintCallUni (i32 4))]>;
1899def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1900"call.uni (retval0, retval1, retval2, retval3, retval4), ",
1901                                [(PrintCallUni (i32 5))]>;
1902def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1903"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1904                                [(PrintCallUni (i32 6))]>;
1905def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1906"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1907                                [(PrintCallUni (i32 7))]>;
1908def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1909!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1910           ", retval5, retval6, retval7), "),
1911                                [(PrintCallUni (i32 8))]>;
1912
1913def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1914                                [(PrintCallUni (i32 0))]>;
1915
1916def LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
1917def LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
1918def LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
1919def LoadParamMemI8     : LoadParamMemInst<Int8Regs, ".b8">;
1920
1921//def LoadParamMemI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1922//                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1923//                "cvt.u16.u32\t$dst, temp_param_reg;"),
1924//                [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1925//def LoadParamMemI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1926//                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1927//                "cvt.u16.u32\t$dst, temp_param_reg;"),
1928//                [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1929
1930def LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
1931def LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
1932
1933def LoadParamRegI64    : LoadParamRegInst<Int64Regs, ".b64">;
1934def LoadParamRegI32    : LoadParamRegInst<Int32Regs, ".b32">;
1935def LoadParamRegI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1936                         "cvt.u16.u32\t$dst, retval$b;",
1937                         [(set Int16Regs:$dst,
1938                           (LoadParam (i32 0), (i32 imm:$b)))]>;
1939def LoadParamRegI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1940                         "cvt.u16.u32\t$dst, retval$b;",
1941                         [(set Int8Regs:$dst,
1942                           (LoadParam (i32 0), (i32 imm:$b)))]>;
1943
1944def LoadParamRegF32    : LoadParamRegInst<Float32Regs, ".f32">;
1945def LoadParamRegF64    : LoadParamRegInst<Float64Regs, ".f64">;
1946
1947def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
1948def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
1949
1950def StoreParamI16    : NVPTXInst<(outs),
1951  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1952                       "st.param.b16\t[param$a+$b], $val;",
1953           [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1954
1955def StoreParamI8     : NVPTXInst<(outs),
1956  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1957                       "st.param.b8\t[param$a+$b], $val;",
1958                       [(StoreParam
1959                         (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1960
1961def StoreParamS32I16 : NVPTXInst<(outs),
1962  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1963                 !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
1964                            "st.param.b32\t[param$a+$b], temp_param_reg;"),
1965                 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1966def StoreParamU32I16 : NVPTXInst<(outs),
1967  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1968                 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1969                            "st.param.b32\t[param$a+$b], temp_param_reg;"),
1970                 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1971
1972def StoreParamU32I8   : NVPTXInst<(outs),
1973  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1974                 !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
1975                            "st.param.b32\t[param$a+$b], temp_param_reg;"),
1976                 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1977def StoreParamS32I8   : NVPTXInst<(outs),
1978  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1979                 !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
1980                            "st.param.b32\t[param$a+$b], temp_param_reg;"),
1981                 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1982
1983def StoreParamF32    : StoreParamInst<Float32Regs, ".f32">;
1984def StoreParamF64    : StoreParamInst<Float64Regs, ".f64">;
1985
1986def MoveToParamI64   : MoveToParamInst<Int64Regs, ".b64">;
1987def MoveToParamI32   : MoveToParamInst<Int32Regs, ".b32">;
1988def MoveToParamF64   : MoveToParamInst<Float64Regs, ".f64">;
1989def MoveToParamF32   : MoveToParamInst<Float32Regs, ".f32">;
1990def MoveToParamI16   : NVPTXInst<(outs),
1991  (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1992                   !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1993                              "mov.b32\tparam$a, temp_param_reg;"),
1994                   [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1995def MoveToParamI8    : NVPTXInst<(outs),
1996  (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1997                   !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1998                              "mov.b32\tparam$a, temp_param_reg;"),
1999                   [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
2000
2001def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
2002def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
2003def StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
2004def StoreRetvalI8     : StoreRetvalInst<Int8Regs, ".b8">;
2005
2006//def StoreRetvalI16    : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
2007//     !strconcat("\{\n\t",
2008//     !strconcat(".reg .b32 temp_retval_reg;\n\t",
2009//     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2010//                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2011//     [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
2012//def StoreRetvalI8     : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
2013//     !strconcat("\{\n\t",
2014//     !strconcat(".reg .b32 temp_retval_reg;\n\t",
2015//     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2016//                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2017//     [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
2018
2019def StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
2020def StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
2021
2022def MoveRetvalI64    : MoveRetvalInst<Int64Regs, ".b64">;
2023def MoveRetvalI32    : MoveRetvalInst<Int32Regs, ".b32">;
2024def MoveRetvalI16    : MoveRetvalInst<Int16Regs, ".b16">;
2025def MoveRetvalI8     : MoveRetvalInst<Int8Regs, ".b8">;
2026def MoveRetvalF64    : MoveRetvalInst<Float64Regs, ".f64">;
2027def MoveRetvalF32    : MoveRetvalInst<Float32Regs, ".f32">;
2028
2029def MoveToRetvalI64    : MoveToRetvalInst<Int64Regs, ".b64">;
2030def MoveToRetvalI32    : MoveToRetvalInst<Int32Regs, ".b32">;
2031def MoveToRetvalF64    : MoveToRetvalInst<Float64Regs, ".f64">;
2032def MoveToRetvalF32    : MoveToRetvalInst<Float32Regs, ".f32">;
2033def MoveToRetvalI16    : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
2034                         "cvt.u32.u16\tfunc_retval$num, $val;",
2035                         [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
2036def MoveToRetvalI8     : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
2037                         "cvt.u32.u16\tfunc_retval$num, $val;",
2038                         [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
2039
2040def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
2041def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
2042def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
2043def RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
2044
2045class CallArgInst<NVPTXRegClass regclass> :
2046      NVPTXInst<(outs), (ins regclass:$a), "$a, ",
2047                [(CallArg (i32 0), regclass:$a)]>;
2048
2049class LastCallArgInst<NVPTXRegClass regclass> :
2050      NVPTXInst<(outs), (ins regclass:$a), "$a",
2051                [(LastCallArg (i32 0), regclass:$a)]>;
2052
2053def CallArgI64     : CallArgInst<Int64Regs>;
2054def CallArgI32     : CallArgInst<Int32Regs>;
2055def CallArgI16     : CallArgInst<Int16Regs>;
2056def CallArgI8      : CallArgInst<Int8Regs>;
2057
2058def CallArgF64     : CallArgInst<Float64Regs>;
2059def CallArgF32     : CallArgInst<Float32Regs>;
2060
2061def LastCallArgI64 : LastCallArgInst<Int64Regs>;
2062def LastCallArgI32 : LastCallArgInst<Int32Regs>;
2063def LastCallArgI16 : LastCallArgInst<Int16Regs>;
2064def LastCallArgI8  : LastCallArgInst<Int8Regs>;
2065
2066def LastCallArgF64 : LastCallArgInst<Float64Regs>;
2067def LastCallArgF32 : LastCallArgInst<Float32Regs>;
2068
2069def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
2070                              [(CallArg (i32 0), (i32 imm:$a))]>;
2071def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
2072                              [(LastCallArg (i32 0), (i32 imm:$a))]>;
2073
2074def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
2075                             [(CallArg (i32 1), (i32 imm:$a))]>;
2076def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
2077                             [(LastCallArg (i32 1), (i32 imm:$a))]>;
2078
2079def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
2080                             "$addr, ",
2081                             [(CallVoid (Wrapper tglobaladdr:$addr))]>;
2082def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
2083                             "$addr, ",
2084                             [(CallVoid Int32Regs:$addr)]>;
2085def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
2086                             "$addr, ",
2087                             [(CallVoid Int64Regs:$addr)]>;
2088def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
2089                             ", prototype_$val;",
2090                             [(Prototype (i32 imm:$val))]>;
2091
2092def DeclareRetMemInst : NVPTXInst<(outs),
2093  (ins i32imm:$align, i32imm:$size, i32imm:$num),
2094         ".param .align $align .b8 retval$num[$size];",
2095         [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
2096def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2097         ".param .b$size retval$num;",
2098         [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
2099def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2100         ".reg .b$size retval$num;",
2101         [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2102
2103def DeclareParamInst : NVPTXInst<(outs),
2104  (ins i32imm:$align, i32imm:$a, i32imm:$size),
2105         ".param .align $align .b8 param$a[$size];",
2106         [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2107def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2108         ".param .b$size param$a;",
2109         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2110def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2111         ".reg .b$size param$a;",
2112         [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2113
2114class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2115      NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2116                !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2117                [(set regclass:$dst, (MoveParam regclass:$src))]>;
2118
2119def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2120def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2121def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2122                   "cvt.u16.u32\t$dst, $src;",
2123                   [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2124def MoveParamI8  : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
2125                   "cvt.u16.u32\t$dst, $src;",
2126                   [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
2127def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2128def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2129
2130class PseudoUseParamInst<NVPTXRegClass regclass> :
2131      NVPTXInst<(outs), (ins regclass:$src),
2132      "// Pseudo use of $src",
2133      [(PseudoUseParam regclass:$src)]>;
2134
2135def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2136def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2137def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2138def PseudoUseParamI8  : PseudoUseParamInst<Int8Regs>;
2139def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2140def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2141
2142
2143//
2144// Load / Store Handling
2145//
2146multiclass LD<NVPTXRegClass regclass> {
2147  def _avar : NVPTXInst<(outs regclass:$dst),
2148    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2149      i32imm:$fromWidth, imem:$addr),
2150!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2151           "$fromWidth \t$dst, [$addr];"), []>;
2152  def _areg : NVPTXInst<(outs regclass:$dst),
2153    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2154      i32imm:$fromWidth, Int32Regs:$addr),
2155!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2156           "$fromWidth \t$dst, [$addr];"), []>;
2157  def _areg_64 : NVPTXInst<(outs regclass:$dst),
2158    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2159     i32imm:$fromWidth, Int64Regs:$addr),
2160     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2161                " \t$dst, [$addr];"), []>;
2162  def _ari : NVPTXInst<(outs regclass:$dst),
2163    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2164      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2165!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2166           "$fromWidth \t$dst, [$addr+$offset];"), []>;
2167  def _ari_64 : NVPTXInst<(outs regclass:$dst),
2168    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2169     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2170    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2171               " \t$dst, [$addr+$offset];"), []>;
2172  def _asi : NVPTXInst<(outs regclass:$dst),
2173    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2174      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2175!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2176           "$fromWidth \t$dst, [$addr+$offset];"), []>;
2177}
2178
2179let mayLoad=1, neverHasSideEffects=1 in {
2180defm LD_i8  : LD<Int8Regs>;
2181defm LD_i16 : LD<Int16Regs>;
2182defm LD_i32 : LD<Int32Regs>;
2183defm LD_i64 : LD<Int64Regs>;
2184defm LD_f32 : LD<Float32Regs>;
2185defm LD_f64 : LD<Float64Regs>;
2186}
2187
2188multiclass ST<NVPTXRegClass regclass> {
2189  def _avar : NVPTXInst<(outs),
2190    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2191      LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2192!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2193           " \t[$addr], $src;"), []>;
2194  def _areg : NVPTXInst<(outs),
2195    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2196      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2197!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2198           " \t[$addr], $src;"), []>;
2199  def _areg_64 : NVPTXInst<(outs),
2200    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2201     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2202  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2203               "\t[$addr], $src;"), []>;
2204  def _ari : NVPTXInst<(outs),
2205    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2206      LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2207!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2208           " \t[$addr+$offset], $src;"), []>;
2209  def _ari_64 : NVPTXInst<(outs),
2210    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2211     LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2212  !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2213               "\t[$addr+$offset], $src;"), []>;
2214  def _asi : NVPTXInst<(outs),
2215    (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2216      LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2217!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2218           " \t[$addr+$offset], $src;"), []>;
2219}
2220
2221let mayStore=1, neverHasSideEffects=1 in {
2222defm ST_i8  : ST<Int8Regs>;
2223defm ST_i16 : ST<Int16Regs>;
2224defm ST_i32 : ST<Int32Regs>;
2225defm ST_i64 : ST<Int64Regs>;
2226defm ST_f32 : ST<Float32Regs>;
2227defm ST_f64 : ST<Float64Regs>;
2228}
2229
2230// The following is used only in and after vector elementizations.
2231// Vector elementization happens at the machine instruction level, so the
2232// following instruction
2233// never appears in the DAG.
2234multiclass LD_VEC<NVPTXRegClass regclass> {
2235  def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2236    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2237      i32imm:$fromWidth, imem:$addr),
2238    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2239               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2240  def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2241    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2242      i32imm:$fromWidth, Int32Regs:$addr),
2243    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2244               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2245  def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2246    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2247     i32imm:$fromWidth, Int64Regs:$addr),
2248    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2249               "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2250  def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2251    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2252      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2253    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2254               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2255  def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2256    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2257     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2258    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2259               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2260  def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2261    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2262      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2263    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2264               "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2265  def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2266      regclass:$dst3, regclass:$dst4),
2267    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2268      i32imm:$fromWidth, imem:$addr),
2269    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2270               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2271  def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2272      regclass:$dst4),
2273    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2274      i32imm:$fromWidth, Int32Regs:$addr),
2275    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2276               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2277  def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2278                               regclass:$dst3, regclass:$dst4),
2279    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2280     i32imm:$fromWidth, Int64Regs:$addr),
2281    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2282               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2283  def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2284      regclass:$dst4),
2285    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2286      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2287    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2288               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2289                []>;
2290  def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2291                              regclass:$dst3, regclass:$dst4),
2292    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2293     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2294    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2295               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2296    []>;
2297  def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2298      regclass:$dst4),
2299    (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2300      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2301    !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2302               "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2303                []>;
2304}
2305let mayLoad=1, neverHasSideEffects=1 in {
2306defm LDV_i8  : LD_VEC<Int8Regs>;
2307defm LDV_i16 : LD_VEC<Int16Regs>;
2308defm LDV_i32 : LD_VEC<Int32Regs>;
2309defm LDV_i64 : LD_VEC<Int64Regs>;
2310defm LDV_f32 : LD_VEC<Float32Regs>;
2311defm LDV_f64 : LD_VEC<Float64Regs>;
2312}
2313
2314multiclass ST_VEC<NVPTXRegClass regclass> {
2315  def _v2_avar : NVPTXInst<(outs),
2316    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2317      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2318    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2319               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2320  def _v2_areg : NVPTXInst<(outs),
2321    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2322      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2323    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2324               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2325  def _v2_areg_64 : NVPTXInst<(outs),
2326    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2327     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2328    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2329               "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2330  def _v2_ari : NVPTXInst<(outs),
2331    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2332      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2333      i32imm:$offset),
2334    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2335               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2336  def _v2_ari_64 : NVPTXInst<(outs),
2337    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2338     LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2339     i32imm:$offset),
2340    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2341               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2342  def _v2_asi : NVPTXInst<(outs),
2343    (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2344      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2345      i32imm:$offset),
2346    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2347               "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2348  def _v4_avar : NVPTXInst<(outs),
2349    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2350      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2351      i32imm:$fromWidth, imem:$addr),
2352    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2353               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2354  def _v4_areg : NVPTXInst<(outs),
2355    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2356      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2357      i32imm:$fromWidth, Int32Regs:$addr),
2358    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2359               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2360  def _v4_areg_64 : NVPTXInst<(outs),
2361    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2362     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2363     i32imm:$fromWidth, Int64Regs:$addr),
2364    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2365               "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2366  def _v4_ari : NVPTXInst<(outs),
2367    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2368      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2369      i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2370    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2371               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2372    []>;
2373  def _v4_ari_64 : NVPTXInst<(outs),
2374    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2375     LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2376     i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2377    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2378               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2379     []>;
2380  def _v4_asi : NVPTXInst<(outs),
2381    (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2382      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2383      i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2384    !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2385               "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2386    []>;
2387}
2388let mayStore=1, neverHasSideEffects=1 in {
2389defm STV_i8  : ST_VEC<Int8Regs>;
2390defm STV_i16 : ST_VEC<Int16Regs>;
2391defm STV_i32 : ST_VEC<Int32Regs>;
2392defm STV_i64 : ST_VEC<Int64Regs>;
2393defm STV_f32 : ST_VEC<Float32Regs>;
2394defm STV_f64 : ST_VEC<Float64Regs>;
2395}
2396
2397
2398//---- Conversion ----
2399
2400multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
2401// FIXME: need to add f16 support
2402//  def CVTf16i8 :
2403//    NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
2404//              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
2405//        [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
2406//  def CVTf16i16 :
2407//    NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
2408//              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
2409//        [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
2410//  def CVTf16i32 :
2411//    NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
2412//              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
2413//        [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
2414//  def CVTf16i64:
2415//    NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
2416//          !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2417//            [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2418
2419  def CVTf32i1 :
2420    NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
2421              "selp.f32 \t$d, 1.0, 0.0, $a;",
2422        [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
2423  def CVTf32i8 :
2424    NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
2425              !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
2426        [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
2427  def CVTf32i16 :
2428    NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
2429              !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
2430        [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
2431  def CVTf32i32 :
2432    NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
2433              !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
2434        [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
2435  def CVTf32i64:
2436    NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
2437          !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2438            [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2439
2440  def CVTf64i1 :
2441    NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
2442              "selp.f64 \t$d, 1.0, 0.0, $a;",
2443        [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
2444  def CVTf64i8 :
2445    NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
2446              !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
2447        [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
2448  def CVTf64i16 :
2449    NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
2450              !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
2451        [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
2452  def CVTf64i32 :
2453    NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
2454              !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
2455        [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
2456  def CVTf64i64:
2457    NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
2458          !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
2459            [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
2460}
2461
2462defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
2463defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
2464
2465multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
2466// FIXME: need to add f16 support
2467//  def CVTi8f16:
2468//    NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
2469//              !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
2470//        [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
2471  def CVTi8f32_ftz:
2472    NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2473              !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2474        [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2475  def CVTi8f32:
2476    NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2477              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2478        [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
2479  def CVTi8f64:
2480    NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
2481              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2482        [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
2483
2484// FIXME: need to add f16 support
2485//  def CVTi16f16:
2486//    NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
2487//              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
2488//        [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
2489  def CVTi16f32_ftz:
2490    NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2491              !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2492        [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2493  def CVTi16f32:
2494    NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2495              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2496        [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
2497  def CVTi16f64:
2498    NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
2499              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2500        [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
2501
2502// FIXME: need to add f16 support
2503//  def CVTi32f16:  def CVTi32f16:
2504//    NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
2505//              !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
2506//        [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
2507  def CVTi32f32_ftz:
2508    NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2509              !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
2510        [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2511  def CVTi32f32:
2512    NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2513              !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
2514        [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
2515  def CVTi32f64:
2516    NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
2517              !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
2518        [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
2519
2520// FIXME: need to add f16 support
2521//  def CVTi64f16:
2522//    NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
2523//              !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
2524//        [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
2525  def CVTi64f32_ftz:
2526    NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2527              !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
2528        [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2529  def CVTi64f32:
2530    NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2531              !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
2532        [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
2533  def CVTi64f64:
2534    NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
2535              !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
2536        [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
2537}
2538
2539defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
2540defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
2541
2542multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
2543  def ext1to8:
2544       NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2545           "selp.u16 \t$d, 1, 0, $a;",
2546     [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2547  def ext1to16:
2548       NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2549           "selp.u16 \t$d, 1, 0, $a;",
2550     [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2551  def ext1to32:
2552       NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2553           "selp.u32 \t$d, 1, 0, $a;",
2554     [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2555  def ext1to64:
2556       NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2557           "selp.u64 \t$d, 1, 0, $a;",
2558     [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2559}
2560
2561multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
2562  def ext1to8:
2563       NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2564           "selp.s16 \t$d, -1, 0, $a;",
2565     [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2566  def ext1to16:
2567       NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2568           "selp.s16 \t$d, -1, 0, $a;",
2569     [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2570  def ext1to32:
2571       NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2572           "selp.s32 \t$d, -1, 0, $a;",
2573     [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2574  def ext1to64:
2575       NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2576           "selp.s64 \t$d, -1, 0, $a;",
2577     [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2578}
2579
2580multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
2581  // All Int8Regs are emiited as 16bit registers in ptx.
2582  // And there is no selp.u8 in ptx.
2583  def ext8to16:
2584       NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
2585           !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
2586             !strconcat(OpStr, "8 \t$d, $a;")))),
2587     [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
2588  def ext8to32:
2589       NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
2590           !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2591             !strconcat(OpStr, "8 \t$d, $a;")))),
2592     [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
2593  def ext8to64:
2594       NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
2595           !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2596             !strconcat(OpStr, "8 \t$d, $a;")))),
2597     [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
2598  def ext16to32:
2599       NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
2600           !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2601             !strconcat(OpStr, "16 \t$d, $a;")))),
2602     [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
2603  def ext16to64:
2604       NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
2605           !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2606             !strconcat(OpStr, "16 \t$d, $a;")))),
2607     [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
2608  def ext32to64:
2609       NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
2610           !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2611             !strconcat(OpStr, "32 \t$d, $a;")))),
2612     [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
2613}
2614
2615defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
2616defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
2617defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
2618
2619defm Sint_extend : INT_EXTEND <"s", sext>;
2620defm Zint_extend : INT_EXTEND <"u", zext>;
2621defm Aint_extend : INT_EXTEND <"u", anyext>;
2622
2623class TRUNC_to1_asm<string sz> {
2624  string s = !strconcat("{{\n\t",
2625             !strconcat(".reg ",
2626             !strconcat(sz,
2627             !strconcat(" temp;\n\t",
2628             !strconcat("and",
2629             !strconcat(sz,
2630             !strconcat("\t temp, $a, 1;\n\t",
2631             !strconcat("setp",
2632             !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
2633}
2634
2635def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2636             "cvt.u32.u64 \t$d, $a;",
2637       [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
2638def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
2639             "cvt.u16.u64 \t$d, $a;",
2640       [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
2641def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
2642             "cvt.u8.u64 \t$d, $a;",
2643       [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
2644def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
2645             "cvt.u16.u32 \t$d, $a;",
2646       [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
2647def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
2648             "cvt.u8.u32 \t$d, $a;",
2649       [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
2650def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
2651             "cvt.u8.u16 \t$d, $a;",
2652       [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
2653def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2654             TRUNC_to1_asm<".b64">.s,
2655             [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
2656def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2657             TRUNC_to1_asm<".b32">.s,
2658             [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
2659def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
2660             TRUNC_to1_asm<".b16">.s,
2661             [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
2662def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
2663             TRUNC_to1_asm<".b16">.s,
2664             [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
2665
2666// Select instructions
2667def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
2668          (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
2669def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2670          (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
2671            (TRUNC_32to1 Int32Regs:$pred))>;
2672def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2673          (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
2674            (TRUNC_32to1 Int32Regs:$pred))>;
2675def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2676          (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
2677            (TRUNC_32to1 Int32Regs:$pred))>;
2678def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2679          (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
2680            (TRUNC_32to1 Int32Regs:$pred))>;
2681def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2682          (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
2683            (TRUNC_32to1 Int32Regs:$pred))>;
2684
2685class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2686  NVPTXRegClass regclassOut> :
2687           NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2688           !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2689     [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2690
2691def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2692def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2693def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2694def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2695
2696// pack a set of smaller int registers to a larger int register
2697def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
2698                          (ins Int8Regs:$s1, Int8Regs:$s2,
2699                               Int8Regs:$s3, Int8Regs:$s4),
2700                          !strconcat("{{\n\t.reg .b8\t%t<4>;",
2701                          !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2702                          !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2703                          !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
2704                          !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
2705                           "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
2706                          []>;
2707def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2708                          (ins Int16Regs:$s1, Int16Regs:$s2,
2709                               Int16Regs:$s3, Int16Regs:$s4),
2710                          "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2711                          []>;
2712def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
2713                          (ins Int8Regs:$s1, Int8Regs:$s2),
2714                          !strconcat("{{\n\t.reg .b8\t%t<2>;",
2715                          !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2716                          !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2717                                     "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
2718                          []>;
2719def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2720                          (ins Int16Regs:$s1, Int16Regs:$s2),
2721                          "mov.b32\t$d, {{$s1, $s2}};",
2722                          []>;
2723def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2724                          (ins Int32Regs:$s1, Int32Regs:$s2),
2725                          "mov.b64\t$d, {{$s1, $s2}};",
2726                          []>;
2727def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2728                          (ins Float32Regs:$s1, Float32Regs:$s2),
2729                          "mov.b64\t$d, {{$s1, $s2}};",
2730                          []>;
2731
2732// unpack a larger int register to a set of smaller int registers
2733def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
2734                                Int8Regs:$d3, Int8Regs:$d4),
2735                          (ins Int32Regs:$s),
2736                          !strconcat("{{\n\t.reg .b8\t%t<4>;",
2737                          !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
2738                          !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2739                          !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
2740                          !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
2741                                     "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
2742                          []>;
2743def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2744                                 Int16Regs:$d3, Int16Regs:$d4),
2745                           (ins Int64Regs:$s),
2746                           "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2747                          []>;
2748def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
2749                          (ins Int16Regs:$s),
2750                          !strconcat("{{\n\t.reg .b8\t%t<2>;",
2751                          !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
2752                          !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2753                                     "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
2754                          []>;
2755def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2756                           (ins Int32Regs:$s),
2757                           "mov.b32\t{{$d1, $d2}}, $s;",
2758                          []>;
2759def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2760                           (ins Int64Regs:$s),
2761                           "mov.b64\t{{$d1, $d2}}, $s;",
2762                          []>;
2763def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2764                           (ins Float64Regs:$s),
2765                           "mov.b64\t{{$d1, $d2}}, $s;",
2766                          []>;
2767
2768def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2769            "cvt.rn.ftz.f32.f64 \t$d, $a;",
2770      [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
2771
2772def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2773            "cvt.rn.f32.f64 \t$d, $a;",
2774      [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
2775
2776def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2777            "cvt.ftz.f64.f32 \t$d, $a;",
2778      [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2779
2780def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2781            "cvt.f64.f32 \t$d, $a;",
2782      [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
2783
2784def retflag       : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2785                           [SDNPHasChain, SDNPOptInGlue]>;
2786
2787//-----------------------------------
2788// Control-flow
2789//-----------------------------------
2790
2791let isTerminator=1 in {
2792   let isReturn=1, isBarrier=1 in
2793      def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2794
2795   let isBranch=1 in
2796      def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2797                          "@$a bra \t$target;",
2798                           [(brcond Int1Regs:$a, bb:$target)]>;
2799   let isBranch=1 in
2800      def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2801                          "@!$a bra \t$target;",
2802                           []>;
2803
2804   let isBranch=1, isBarrier=1 in
2805      def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2806                        "bra.uni \t$target;",
2807                  [(br bb:$target)]>;
2808}
2809
2810def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
2811    (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
2812
2813// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2814// conditional branch if
2815// the target block is the next block so that the code can fall through to the
2816// target block.
2817// The invertion is done by 'xor condition, 1', which will be translated to
2818// (setne condition, -1).
2819// Since ptx supports '@!pred bra target', we should use it.
2820def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2821  (CBranchOther Int1Regs:$a, bb:$target)>;
2822
2823// Call
2824def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2825def SDT_NVPTXCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2826                                        SDTCisVT<1, i32> ]>;
2827
2828def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2829                           [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2830def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_NVPTXCallSeqEnd,
2831                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2832                           SDNPSideEffect]>;
2833
2834def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2835def call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2836                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2837def calltarget : Operand<i32>;
2838let isCall=1 in {
2839   def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2840                  "call \t$dst, (1);", []>;
2841}
2842
2843def : Pat<(call tglobaladdr:$dst),
2844          (CALL tglobaladdr:$dst)>;
2845def : Pat<(call texternalsym:$dst),
2846          (CALL texternalsym:$dst)>;
2847
2848// Pseudo instructions.
2849class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2850   : NVPTXInst<outs, ins, asmstr, pattern>;
2851
2852// @TODO: We use some tricks here to emit curly braces.  Can we clean this up
2853// a bit without TableGen modifications?
2854def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2855  "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2856                               [(callseq_start timm:$amt)]>;
2857def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2858  "\n\t//{{\n\t}}// Callseq End $amt1",
2859                            [(callseq_end timm:$amt1, timm:$amt2)]>;
2860
2861// trap instruction
2862
2863def trapinst : NVPTXInst<(outs), (ins),
2864                         "trap;",
2865                         [(trap)]>;
2866
2867include "NVPTXIntrinsics.td"
2868
2869
2870//-----------------------------------
2871// Notes
2872//-----------------------------------
2873// BSWAP is currently expanded. The following is a more efficient
2874// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2875// - for sm_20, use pmpt (use vector scalar mov to get the pack and
2876//   unpack). sm_20 supports native 32-bit register, but not native 16-bit
2877// register.
2878