1//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the PTX instructions in TableGen format. 11// 12//===----------------------------------------------------------------------===// 13 14include "NVPTXInstrFormats.td" 15 16// A NOP instruction 17let hasSideEffects = 0 in { 18 def NOP : NVPTXInst<(outs), (ins), "", []>; 19} 20 21// List of vector specific properties 22def isVecLD : VecInstTypeEnum<1>; 23def isVecST : VecInstTypeEnum<2>; 24def isVecBuild : VecInstTypeEnum<3>; 25def isVecShuffle : VecInstTypeEnum<4>; 26def isVecExtract : VecInstTypeEnum<5>; 27def isVecInsert : VecInstTypeEnum<6>; 28def isVecDest : VecInstTypeEnum<7>; 29def isVecOther : VecInstTypeEnum<15>; 30 31//===----------------------------------------------------------------------===// 32// NVPTX Operand Definitions. 33//===----------------------------------------------------------------------===// 34 35def brtarget : Operand<OtherVT>; 36 37// CVT conversion modes 38// These must match the enum in NVPTX.h 39def CvtNONE : PatLeaf<(i32 0x0)>; 40def CvtRNI : PatLeaf<(i32 0x1)>; 41def CvtRZI : PatLeaf<(i32 0x2)>; 42def CvtRMI : PatLeaf<(i32 0x3)>; 43def CvtRPI : PatLeaf<(i32 0x4)>; 44def CvtRN : PatLeaf<(i32 0x5)>; 45def CvtRZ : PatLeaf<(i32 0x6)>; 46def CvtRM : PatLeaf<(i32 0x7)>; 47def CvtRP : PatLeaf<(i32 0x8)>; 48 49def CvtNONE_FTZ : PatLeaf<(i32 0x10)>; 50def CvtRNI_FTZ : PatLeaf<(i32 0x11)>; 51def CvtRZI_FTZ : PatLeaf<(i32 0x12)>; 52def CvtRMI_FTZ : PatLeaf<(i32 0x13)>; 53def CvtRPI_FTZ : PatLeaf<(i32 0x14)>; 54def CvtRN_FTZ : PatLeaf<(i32 0x15)>; 55def CvtRZ_FTZ : PatLeaf<(i32 0x16)>; 56def CvtRM_FTZ : PatLeaf<(i32 0x17)>; 57def CvtRP_FTZ : PatLeaf<(i32 0x18)>; 58 59def CvtSAT : PatLeaf<(i32 0x20)>; 60def CvtSAT_FTZ : PatLeaf<(i32 0x30)>; 61 62def CvtMode : Operand<i32> { 63 let PrintMethod = "printCvtMode"; 64} 65 66// Compare modes 67// These must match the enum in NVPTX.h 68def CmpEQ : PatLeaf<(i32 0)>; 69def CmpNE : PatLeaf<(i32 1)>; 70def CmpLT : PatLeaf<(i32 2)>; 71def CmpLE : PatLeaf<(i32 3)>; 72def CmpGT : PatLeaf<(i32 4)>; 73def CmpGE : PatLeaf<(i32 5)>; 74def CmpLO : PatLeaf<(i32 6)>; 75def CmpLS : PatLeaf<(i32 7)>; 76def CmpHI : PatLeaf<(i32 8)>; 77def CmpHS : PatLeaf<(i32 9)>; 78def CmpEQU : PatLeaf<(i32 10)>; 79def CmpNEU : PatLeaf<(i32 11)>; 80def CmpLTU : PatLeaf<(i32 12)>; 81def CmpLEU : PatLeaf<(i32 13)>; 82def CmpGTU : PatLeaf<(i32 14)>; 83def CmpGEU : PatLeaf<(i32 15)>; 84def CmpNUM : PatLeaf<(i32 16)>; 85def CmpNAN : PatLeaf<(i32 17)>; 86 87def CmpEQ_FTZ : PatLeaf<(i32 0x100)>; 88def CmpNE_FTZ : PatLeaf<(i32 0x101)>; 89def CmpLT_FTZ : PatLeaf<(i32 0x102)>; 90def CmpLE_FTZ : PatLeaf<(i32 0x103)>; 91def CmpGT_FTZ : PatLeaf<(i32 0x104)>; 92def CmpGE_FTZ : PatLeaf<(i32 0x105)>; 93def CmpLO_FTZ : PatLeaf<(i32 0x106)>; 94def CmpLS_FTZ : PatLeaf<(i32 0x107)>; 95def CmpHI_FTZ : PatLeaf<(i32 0x108)>; 96def CmpHS_FTZ : PatLeaf<(i32 0x109)>; 97def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>; 98def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>; 99def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>; 100def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>; 101def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>; 102def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>; 103def CmpNUM_FTZ : PatLeaf<(i32 0x110)>; 104def CmpNAN_FTZ : PatLeaf<(i32 0x111)>; 105 106def CmpMode : Operand<i32> { 107 let PrintMethod = "printCmpMode"; 108} 109 110def F32ConstZero : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{ 111 return CurDAG->getTargetConstantFP(0.0, MVT::f32); 112 }]>; 113def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{ 114 return CurDAG->getTargetConstantFP(1.0, MVT::f32); 115 }]>; 116 117//===----------------------------------------------------------------------===// 118// NVPTX Instruction Predicate Definitions 119//===----------------------------------------------------------------------===// 120 121 122def hasAtomRedG32 : Predicate<"Subtarget->hasAtomRedG32()">; 123def hasAtomRedS32 : Predicate<"Subtarget->hasAtomRedS32()">; 124def hasAtomRedGen32 : Predicate<"Subtarget->hasAtomRedGen32()">; 125def useAtomRedG32forGen32 : 126 Predicate<"!Subtarget->hasAtomRedGen32() && Subtarget->hasAtomRedG32()">; 127def hasBrkPt : Predicate<"Subtarget->hasBrkPt()">; 128def hasAtomRedG64 : Predicate<"Subtarget->hasAtomRedG64()">; 129def hasAtomRedS64 : Predicate<"Subtarget->hasAtomRedS64()">; 130def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">; 131def useAtomRedG64forGen64 : 132 Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">; 133def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">; 134def hasVote : Predicate<"Subtarget->hasVote()">; 135def hasDouble : Predicate<"Subtarget->hasDouble()">; 136def reqPTX20 : Predicate<"Subtarget->reqPTX20()">; 137def hasLDG : Predicate<"Subtarget->hasLDG()">; 138def hasLDU : Predicate<"Subtarget->hasLDU()">; 139def hasGenericLdSt : Predicate<"Subtarget->hasGenericLdSt()">; 140 141def doF32FTZ : Predicate<"useF32FTZ()">; 142def doNoF32FTZ : Predicate<"!useF32FTZ()">; 143 144def doMulWide : Predicate<"doMulWide">; 145 146def allowFMA : Predicate<"allowFMA()">; 147def noFMA : Predicate<"!allowFMA()">; 148 149def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; 150def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; 151 152def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; 153def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; 154 155def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; 156def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; 157 158def true : Predicate<"1">; 159 160def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">; 161 162 163//===----------------------------------------------------------------------===// 164// Some Common Instruction Class Templates 165//===----------------------------------------------------------------------===// 166 167// Template for instructions which take three int64, int32, or int16 args. 168// The instructions are named "<OpcStr><Width>" (e.g. "add.s64"). 169multiclass I3<string OpcStr, SDNode OpNode> { 170 def i64rr : 171 NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 172 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 173 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; 174 def i64ri : 175 NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 176 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 177 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 178 def i32rr : 179 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 180 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 181 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; 182 def i32ri : 183 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 184 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 185 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 186 def i16rr : 187 NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 188 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 189 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; 190 def i16ri : 191 NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 192 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 193 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; 194} 195 196// Template for instructions which take 3 int32 args. The instructions are 197// named "<OpcStr>.s32" (e.g. "addc.cc.s32"). 198multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> { 199 def i32rr : 200 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 201 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 202 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; 203 def i32ri : 204 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 205 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 206 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 207} 208 209// Template for instructions which take three fp64 or fp32 args. The 210// instructions are named "<OpcStr>.f<Width>" (e.g. "add.f64"). 211// 212// Also defines ftz (flush subnormal inputs and results to sign-preserving 213// zero) variants for fp32 functions. 214multiclass F3<string OpcStr, SDNode OpNode> { 215 def f64rr : 216 NVPTXInst<(outs Float64Regs:$dst), 217 (ins Float64Regs:$a, Float64Regs:$b), 218 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 219 [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, 220 Requires<[allowFMA]>; 221 def f64ri : 222 NVPTXInst<(outs Float64Regs:$dst), 223 (ins Float64Regs:$a, f64imm:$b), 224 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 225 [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, 226 Requires<[allowFMA]>; 227 def f32rr_ftz : 228 NVPTXInst<(outs Float32Regs:$dst), 229 (ins Float32Regs:$a, Float32Regs:$b), 230 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 231 [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, 232 Requires<[allowFMA, doF32FTZ]>; 233 def f32ri_ftz : 234 NVPTXInst<(outs Float32Regs:$dst), 235 (ins Float32Regs:$a, f32imm:$b), 236 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 237 [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 238 Requires<[allowFMA, doF32FTZ]>; 239 def f32rr : 240 NVPTXInst<(outs Float32Regs:$dst), 241 (ins Float32Regs:$a, Float32Regs:$b), 242 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 243 [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, 244 Requires<[allowFMA]>; 245 def f32ri : 246 NVPTXInst<(outs Float32Regs:$dst), 247 (ins Float32Regs:$a, f32imm:$b), 248 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 249 [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 250 Requires<[allowFMA]>; 251} 252 253// Same as F3, but defines ".rn" variants (round to nearest even). 254multiclass F3_rn<string OpcStr, SDNode OpNode> { 255 def f64rr : 256 NVPTXInst<(outs Float64Regs:$dst), 257 (ins Float64Regs:$a, Float64Regs:$b), 258 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 259 [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>, 260 Requires<[noFMA]>; 261 def f64ri : 262 NVPTXInst<(outs Float64Regs:$dst), 263 (ins Float64Regs:$a, f64imm:$b), 264 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 265 [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>, 266 Requires<[noFMA]>; 267 def f32rr_ftz : 268 NVPTXInst<(outs Float32Regs:$dst), 269 (ins Float32Regs:$a, Float32Regs:$b), 270 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 271 [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, 272 Requires<[noFMA, doF32FTZ]>; 273 def f32ri_ftz : 274 NVPTXInst<(outs Float32Regs:$dst), 275 (ins Float32Regs:$a, f32imm:$b), 276 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 277 [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 278 Requires<[noFMA, doF32FTZ]>; 279 def f32rr : 280 NVPTXInst<(outs Float32Regs:$dst), 281 (ins Float32Regs:$a, Float32Regs:$b), 282 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 283 [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>, 284 Requires<[noFMA]>; 285 def f32ri : 286 NVPTXInst<(outs Float32Regs:$dst), 287 (ins Float32Regs:$a, f32imm:$b), 288 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 289 [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 290 Requires<[noFMA]>; 291} 292 293// Template for operations which take two f32 or f64 operands. Provides three 294// instructions: <OpcStr>.f64, <OpcStr>.f32, and <OpcStr>.ftz.f32 (flush 295// subnormal inputs and results to zero). 296multiclass F2<string OpcStr, SDNode OpNode> { 297 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), 298 !strconcat(OpcStr, ".f64 \t$dst, $a;"), 299 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; 300 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 301 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), 302 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, 303 Requires<[doF32FTZ]>; 304 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 305 !strconcat(OpcStr, ".f32 \t$dst, $a;"), 306 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; 307} 308 309//===----------------------------------------------------------------------===// 310// NVPTX Instructions. 311//===----------------------------------------------------------------------===// 312 313//----------------------------------- 314// Type Conversion 315//----------------------------------- 316 317let hasSideEffects = 0 in { 318 // Generate a cvt to the given type from all possible types. Each instance 319 // takes a CvtMode immediate that defines the conversion mode to use. It can 320 // be CvtNONE to omit a conversion mode. 321 multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> { 322 def _s8 : 323 NVPTXInst<(outs RC:$dst), 324 (ins Int16Regs:$src, CvtMode:$mode), 325 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 326 FromName, ".s8\t$dst, $src;"), []>; 327 def _u8 : 328 NVPTXInst<(outs RC:$dst), 329 (ins Int16Regs:$src, CvtMode:$mode), 330 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 331 FromName, ".u8\t$dst, $src;"), []>; 332 def _s16 : 333 NVPTXInst<(outs RC:$dst), 334 (ins Int16Regs:$src, CvtMode:$mode), 335 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 336 FromName, ".s16\t$dst, $src;"), []>; 337 def _u16 : 338 NVPTXInst<(outs RC:$dst), 339 (ins Int16Regs:$src, CvtMode:$mode), 340 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 341 FromName, ".u16\t$dst, $src;"), []>; 342 def _f16 : 343 NVPTXInst<(outs RC:$dst), 344 (ins Int16Regs:$src, CvtMode:$mode), 345 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 346 FromName, ".f16\t$dst, $src;"), []>; 347 def _s32 : 348 NVPTXInst<(outs RC:$dst), 349 (ins Int32Regs:$src, CvtMode:$mode), 350 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 351 FromName, ".s32\t$dst, $src;"), []>; 352 def _u32 : 353 NVPTXInst<(outs RC:$dst), 354 (ins Int32Regs:$src, CvtMode:$mode), 355 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 356 FromName, ".u32\t$dst, $src;"), []>; 357 def _s64 : 358 NVPTXInst<(outs RC:$dst), 359 (ins Int64Regs:$src, CvtMode:$mode), 360 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 361 FromName, ".s64\t$dst, $src;"), []>; 362 def _u64 : 363 NVPTXInst<(outs RC:$dst), 364 (ins Int64Regs:$src, CvtMode:$mode), 365 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 366 FromName, ".u64\t$dst, $src;"), []>; 367 def _f32 : 368 NVPTXInst<(outs RC:$dst), 369 (ins Float32Regs:$src, CvtMode:$mode), 370 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 371 FromName, ".f32\t$dst, $src;"), []>; 372 def _f64 : 373 NVPTXInst<(outs RC:$dst), 374 (ins Float64Regs:$src, CvtMode:$mode), 375 !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", 376 FromName, ".f64\t$dst, $src;"), []>; 377 } 378 379 // Generate cvts from all types to all types. 380 defm CVT_s8 : CVT_FROM_ALL<"s8", Int16Regs>; 381 defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>; 382 defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>; 383 defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>; 384 defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; 385 defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>; 386 defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; 387 defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; 388 defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; 389 defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; 390 defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; 391 392 // These cvts are different from those above: The source and dest registers 393 // are of the same type. 394 def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 395 "cvt.s16.s8 \t$dst, $src;", []>; 396 def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 397 "cvt.s32.s8 \t$dst, $src;", []>; 398 def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 399 "cvt.s32.s16 \t$dst, $src;", []>; 400 def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 401 "cvt.s64.s8 \t$dst, $src;", []>; 402 def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 403 "cvt.s64.s16 \t$dst, $src;", []>; 404 def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 405 "cvt.s64.s32 \t$dst, $src;", []>; 406} 407 408//----------------------------------- 409// Integer Arithmetic 410//----------------------------------- 411 412// Template for xor masquerading as int1 arithmetic. 413multiclass ADD_SUB_i1<SDNode OpNode> { 414 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 415 "xor.pred \t$dst, $a, $b;", 416 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 417 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 418 "xor.pred \t$dst, $a, $b;", 419 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; 420} 421 422// int1 addition and subtraction are both just xor. 423defm ADD_i1 : ADD_SUB_i1<add>; 424defm SUB_i1 : ADD_SUB_i1<sub>; 425 426// int16, int32, and int64 signed addition. Since nvptx is 2's compliment, we 427// also use these for unsigned arithmetic. 428defm ADD : I3<"add.s", add>; 429defm SUB : I3<"sub.s", sub>; 430 431// int32 addition and subtraction with carry-out. 432// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?). 433defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; 434defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; 435 436// int32 addition and subtraction with carry-in and carry-out. 437defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; 438defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; 439 440defm MULT : I3<"mul.lo.s", mul>; 441 442defm MULTHS : I3<"mul.hi.s", mulhs>; 443defm MULTHU : I3<"mul.hi.u", mulhu>; 444 445defm SDIV : I3<"div.s", sdiv>; 446defm UDIV : I3<"div.u", udiv>; 447 448// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM 449// will lower it. 450defm SREM : I3<"rem.s", srem>; 451defm UREM : I3<"rem.u", urem>; 452 453 454// 455// Wide multiplication 456// 457def MULWIDES64 : 458 NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 459 "mul.wide.s32 \t$dst, $a, $b;", []>; 460def MULWIDES64Imm : 461 NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 462 "mul.wide.s32 \t$dst, $a, $b;", []>; 463def MULWIDES64Imm64 : 464 NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), 465 "mul.wide.s32 \t$dst, $a, $b;", []>; 466 467def MULWIDEU64 : 468 NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 469 "mul.wide.u32 \t$dst, $a, $b;", []>; 470def MULWIDEU64Imm : 471 NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 472 "mul.wide.u32 \t$dst, $a, $b;", []>; 473def MULWIDEU64Imm64 : 474 NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), 475 "mul.wide.u32 \t$dst, $a, $b;", []>; 476 477def MULWIDES32 : 478 NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 479 "mul.wide.s16 \t$dst, $a, $b;", []>; 480def MULWIDES32Imm : 481 NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 482 "mul.wide.s16 \t$dst, $a, $b;", []>; 483def MULWIDES32Imm32 : 484 NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 485 "mul.wide.s16 \t$dst, $a, $b;", []>; 486 487def MULWIDEU32 : 488 NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 489 "mul.wide.u16 \t$dst, $a, $b;", []>; 490def MULWIDEU32Imm : 491 NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 492 "mul.wide.u16 \t$dst, $a, $b;", []>; 493def MULWIDEU32Imm32 : 494 NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 495 "mul.wide.u16 \t$dst, $a, $b;", []>; 496 497def SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; 498def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; 499def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; 500 501// Matchers for signed, unsigned mul.wide ISD nodes. 502def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), 503 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, 504 Requires<[doMulWide]>; 505def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), 506 (MULWIDES32Imm Int16Regs:$a, imm:$b)>, 507 Requires<[doMulWide]>; 508def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), 509 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, 510 Requires<[doMulWide]>; 511def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), 512 (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, 513 Requires<[doMulWide]>; 514 515def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), 516 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, 517 Requires<[doMulWide]>; 518def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), 519 (MULWIDES64Imm Int32Regs:$a, imm:$b)>, 520 Requires<[doMulWide]>; 521def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), 522 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, 523 Requires<[doMulWide]>; 524def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), 525 (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, 526 Requires<[doMulWide]>; 527 528// Predicates used for converting some patterns to mul.wide. 529def SInt32Const : PatLeaf<(imm), [{ 530 const APInt &v = N->getAPIntValue(); 531 return v.isSignedIntN(32); 532}]>; 533 534def UInt32Const : PatLeaf<(imm), [{ 535 const APInt &v = N->getAPIntValue(); 536 return v.isIntN(32); 537}]>; 538 539def SInt16Const : PatLeaf<(imm), [{ 540 const APInt &v = N->getAPIntValue(); 541 return v.isSignedIntN(16); 542}]>; 543 544def UInt16Const : PatLeaf<(imm), [{ 545 const APInt &v = N->getAPIntValue(); 546 return v.isIntN(16); 547}]>; 548 549def Int5Const : PatLeaf<(imm), [{ 550 // Check if 0 <= v < 32; only then will the result of (x << v) be an int32. 551 const APInt &v = N->getAPIntValue(); 552 return v.sge(0) && v.slt(32); 553}]>; 554 555def Int4Const : PatLeaf<(imm), [{ 556 // Check if 0 <= v < 16; only then will the result of (x << v) be an int16. 557 const APInt &v = N->getAPIntValue(); 558 return v.sge(0) && v.slt(16); 559}]>; 560 561def SHL2MUL32 : SDNodeXForm<imm, [{ 562 const APInt &v = N->getAPIntValue(); 563 APInt temp(32, 1); 564 return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i32); 565}]>; 566 567def SHL2MUL16 : SDNodeXForm<imm, [{ 568 const APInt &v = N->getAPIntValue(); 569 APInt temp(16, 1); 570 return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16); 571}]>; 572 573// Convert "sign/zero-extend, then shift left by an immediate" to mul.wide. 574def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), 575 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 576 Requires<[doMulWide]>; 577def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), 578 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 579 Requires<[doMulWide]>; 580 581def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), 582 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 583 Requires<[doMulWide]>; 584def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), 585 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 586 Requires<[doMulWide]>; 587 588// Convert "sign/zero-extend then multiply" to mul.wide. 589def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), 590 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, 591 Requires<[doMulWide]>; 592def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), 593 (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>, 594 Requires<[doMulWide]>; 595 596def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), 597 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, 598 Requires<[doMulWide]>; 599def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), 600 (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>, 601 Requires<[doMulWide]>; 602 603def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), 604 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, 605 Requires<[doMulWide]>; 606def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), 607 (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>, 608 Requires<[doMulWide]>; 609 610def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), 611 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, 612 Requires<[doMulWide]>; 613def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), 614 (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>, 615 Requires<[doMulWide]>; 616 617// 618// Integer multiply-add 619// 620def SDTIMAD : 621 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, 622 SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; 623def imad : SDNode<"NVPTXISD::IMAD", SDTIMAD>; 624 625def MAD16rrr : 626 NVPTXInst<(outs Int16Regs:$dst), 627 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), 628 "mad.lo.s16 \t$dst, $a, $b, $c;", 629 [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; 630def MAD16rri : 631 NVPTXInst<(outs Int16Regs:$dst), 632 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), 633 "mad.lo.s16 \t$dst, $a, $b, $c;", 634 [(set Int16Regs:$dst, (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; 635def MAD16rir : 636 NVPTXInst<(outs Int16Regs:$dst), 637 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), 638 "mad.lo.s16 \t$dst, $a, $b, $c;", 639 [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; 640def MAD16rii : 641 NVPTXInst<(outs Int16Regs:$dst), 642 (ins Int16Regs:$a, i16imm:$b, i16imm:$c), 643 "mad.lo.s16 \t$dst, $a, $b, $c;", 644 [(set Int16Regs:$dst, (imad Int16Regs:$a, imm:$b, imm:$c))]>; 645 646def MAD32rrr : 647 NVPTXInst<(outs Int32Regs:$dst), 648 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), 649 "mad.lo.s32 \t$dst, $a, $b, $c;", 650 [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>; 651def MAD32rri : 652 NVPTXInst<(outs Int32Regs:$dst), 653 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), 654 "mad.lo.s32 \t$dst, $a, $b, $c;", 655 [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>; 656def MAD32rir : 657 NVPTXInst<(outs Int32Regs:$dst), 658 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), 659 "mad.lo.s32 \t$dst, $a, $b, $c;", 660 [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>; 661def MAD32rii : 662 NVPTXInst<(outs Int32Regs:$dst), 663 (ins Int32Regs:$a, i32imm:$b, i32imm:$c), 664 "mad.lo.s32 \t$dst, $a, $b, $c;", 665 [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, imm:$c))]>; 666 667def MAD64rrr : 668 NVPTXInst<(outs Int64Regs:$dst), 669 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), 670 "mad.lo.s64 \t$dst, $a, $b, $c;", 671 [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; 672def MAD64rri : 673 NVPTXInst<(outs Int64Regs:$dst), 674 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), 675 "mad.lo.s64 \t$dst, $a, $b, $c;", 676 [(set Int64Regs:$dst, (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; 677def MAD64rir : 678 NVPTXInst<(outs Int64Regs:$dst), 679 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), 680 "mad.lo.s64 \t$dst, $a, $b, $c;", 681 [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; 682def MAD64rii : 683 NVPTXInst<(outs Int64Regs:$dst), 684 (ins Int64Regs:$a, i64imm:$b, i64imm:$c), 685 "mad.lo.s64 \t$dst, $a, $b, $c;", 686 [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, imm:$c))]>; 687 688def INEG16 : 689 NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 690 "neg.s16 \t$dst, $src;", 691 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; 692def INEG32 : 693 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 694 "neg.s32 \t$dst, $src;", 695 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; 696def INEG64 : 697 NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 698 "neg.s64 \t$dst, $src;", 699 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; 700 701//----------------------------------- 702// Floating Point Arithmetic 703//----------------------------------- 704 705// Constant 1.0f 706def FloatConst1 : PatLeaf<(fpimm), [{ 707 return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEsingle && 708 N->getValueAPF().convertToFloat() == 1.0f; 709}]>; 710// Constant 1.0 (double) 711def DoubleConst1 : PatLeaf<(fpimm), [{ 712 return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble && 713 N->getValueAPF().convertToDouble() == 1.0; 714}]>; 715 716defm FADD : F3<"add", fadd>; 717defm FSUB : F3<"sub", fsub>; 718defm FMUL : F3<"mul", fmul>; 719 720defm FADD_rn : F3_rn<"add", fadd>; 721defm FSUB_rn : F3_rn<"sub", fsub>; 722defm FMUL_rn : F3_rn<"mul", fmul>; 723 724defm FABS : F2<"abs", fabs>; 725defm FNEG : F2<"neg", fneg>; 726defm FSQRT : F2<"sqrt.rn", fsqrt>; 727 728// 729// F64 division 730// 731def FDIV641r : 732 NVPTXInst<(outs Float64Regs:$dst), 733 (ins f64imm:$a, Float64Regs:$b), 734 "rcp.rn.f64 \t$dst, $b;", 735 [(set Float64Regs:$dst, (fdiv DoubleConst1:$a, Float64Regs:$b))]>; 736def FDIV64rr : 737 NVPTXInst<(outs Float64Regs:$dst), 738 (ins Float64Regs:$a, Float64Regs:$b), 739 "div.rn.f64 \t$dst, $a, $b;", 740 [(set Float64Regs:$dst, (fdiv Float64Regs:$a, Float64Regs:$b))]>; 741def FDIV64ri : 742 NVPTXInst<(outs Float64Regs:$dst), 743 (ins Float64Regs:$a, f64imm:$b), 744 "div.rn.f64 \t$dst, $a, $b;", 745 [(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>; 746 747// 748// F32 Approximate reciprocal 749// 750def FDIV321r_ftz : 751 NVPTXInst<(outs Float32Regs:$dst), 752 (ins f32imm:$a, Float32Regs:$b), 753 "rcp.approx.ftz.f32 \t$dst, $b;", 754 [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 755 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 756def FDIV321r : 757 NVPTXInst<(outs Float32Regs:$dst), 758 (ins f32imm:$a, Float32Regs:$b), 759 "rcp.approx.f32 \t$dst, $b;", 760 [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 761 Requires<[do_DIVF32_APPROX]>; 762// 763// F32 Approximate division 764// 765def FDIV32approxrr_ftz : 766 NVPTXInst<(outs Float32Regs:$dst), 767 (ins Float32Regs:$a, Float32Regs:$b), 768 "div.approx.ftz.f32 \t$dst, $a, $b;", 769 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 770 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 771def FDIV32approxri_ftz : 772 NVPTXInst<(outs Float32Regs:$dst), 773 (ins Float32Regs:$a, f32imm:$b), 774 "div.approx.ftz.f32 \t$dst, $a, $b;", 775 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 776 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 777def FDIV32approxrr : 778 NVPTXInst<(outs Float32Regs:$dst), 779 (ins Float32Regs:$a, Float32Regs:$b), 780 "div.approx.f32 \t$dst, $a, $b;", 781 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 782 Requires<[do_DIVF32_APPROX]>; 783def FDIV32approxri : 784 NVPTXInst<(outs Float32Regs:$dst), 785 (ins Float32Regs:$a, f32imm:$b), 786 "div.approx.f32 \t$dst, $a, $b;", 787 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 788 Requires<[do_DIVF32_APPROX]>; 789// 790// F32 Semi-accurate reciprocal 791// 792// rcp.approx gives the same result as div.full(1.0f, a) and is faster. 793// 794def FDIV321r_approx_ftz : 795 NVPTXInst<(outs Float32Regs:$dst), 796 (ins f32imm:$a, Float32Regs:$b), 797 "rcp.approx.ftz.f32 \t$dst, $b;", 798 [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 799 Requires<[do_DIVF32_FULL, doF32FTZ]>; 800def FDIV321r_approx : 801 NVPTXInst<(outs Float32Regs:$dst), 802 (ins f32imm:$a, Float32Regs:$b), 803 "rcp.approx.f32 \t$dst, $b;", 804 [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 805 Requires<[do_DIVF32_FULL]>; 806// 807// F32 Semi-accurate division 808// 809def FDIV32rr_ftz : 810 NVPTXInst<(outs Float32Regs:$dst), 811 (ins Float32Regs:$a, Float32Regs:$b), 812 "div.full.ftz.f32 \t$dst, $a, $b;", 813 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 814 Requires<[do_DIVF32_FULL, doF32FTZ]>; 815def FDIV32ri_ftz : 816 NVPTXInst<(outs Float32Regs:$dst), 817 (ins Float32Regs:$a, f32imm:$b), 818 "div.full.ftz.f32 \t$dst, $a, $b;", 819 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 820 Requires<[do_DIVF32_FULL, doF32FTZ]>; 821def FDIV32rr : 822 NVPTXInst<(outs Float32Regs:$dst), 823 (ins Float32Regs:$a, Float32Regs:$b), 824 "div.full.f32 \t$dst, $a, $b;", 825 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 826 Requires<[do_DIVF32_FULL]>; 827def FDIV32ri : 828 NVPTXInst<(outs Float32Regs:$dst), 829 (ins Float32Regs:$a, f32imm:$b), 830 "div.full.f32 \t$dst, $a, $b;", 831 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 832 Requires<[do_DIVF32_FULL]>; 833// 834// F32 Accurate reciprocal 835// 836def FDIV321r_prec_ftz : 837 NVPTXInst<(outs Float32Regs:$dst), 838 (ins f32imm:$a, Float32Regs:$b), 839 "rcp.rn.ftz.f32 \t$dst, $b;", 840 [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 841 Requires<[reqPTX20, doF32FTZ]>; 842def FDIV321r_prec : 843 NVPTXInst<(outs Float32Regs:$dst), 844 (ins f32imm:$a, Float32Regs:$b), 845 "rcp.rn.f32 \t$dst, $b;", 846 [(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>, 847 Requires<[reqPTX20]>; 848// 849// F32 Accurate division 850// 851def FDIV32rr_prec_ftz : 852 NVPTXInst<(outs Float32Regs:$dst), 853 (ins Float32Regs:$a, Float32Regs:$b), 854 "div.rn.ftz.f32 \t$dst, $a, $b;", 855 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 856 Requires<[doF32FTZ, reqPTX20]>; 857def FDIV32ri_prec_ftz : 858 NVPTXInst<(outs Float32Regs:$dst), 859 (ins Float32Regs:$a, f32imm:$b), 860 "div.rn.ftz.f32 \t$dst, $a, $b;", 861 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 862 Requires<[doF32FTZ, reqPTX20]>; 863def FDIV32rr_prec : 864 NVPTXInst<(outs Float32Regs:$dst), 865 (ins Float32Regs:$a, Float32Regs:$b), 866 "div.rn.f32 \t$dst, $a, $b;", 867 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>, 868 Requires<[reqPTX20]>; 869def FDIV32ri_prec : 870 NVPTXInst<(outs Float32Regs:$dst), 871 (ins Float32Regs:$a, f32imm:$b), 872 "div.rn.f32 \t$dst, $a, $b;", 873 [(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>, 874 Requires<[reqPTX20]>; 875 876// 877// F32 rsqrt 878// 879 880def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b), 881 "rsqrt.approx.f32 \t$dst, $b;", []>; 882 883// Convert 1.0f/sqrt(x) to rsqrt.approx.f32. (There is an rsqrt.approx.f64, but 884// it's emulated in software.) 885def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)), 886 (RSQRTF32approx1r Float32Regs:$b)>, 887 Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>; 888 889multiclass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred> { 890 def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), 891 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 892 [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>, 893 Requires<[Pred]>; 894 def rri : NVPTXInst<(outs RC:$dst), 895 (ins RC:$a, RC:$b, ImmCls:$c), 896 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 897 [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>, 898 Requires<[Pred]>; 899 def rir : NVPTXInst<(outs RC:$dst), 900 (ins RC:$a, ImmCls:$b, RC:$c), 901 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 902 [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>, 903 Requires<[Pred]>; 904 def rii : NVPTXInst<(outs RC:$dst), 905 (ins RC:$a, ImmCls:$b, ImmCls:$c), 906 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 907 [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>, 908 Requires<[Pred]>; 909} 910 911defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>; 912defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, true>; 913defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>; 914 915// sin/cos 916def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 917 "sin.approx.f32 \t$dst, $src;", 918 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; 919def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 920 "cos.approx.f32 \t$dst, $src;", 921 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; 922 923// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)), 924// i.e. "poor man's fmod()" 925 926// frem - f32 FTZ 927def : Pat<(frem Float32Regs:$x, Float32Regs:$y), 928 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32rr_ftz (CVT_f32_f32 929 (FDIV32rr_prec_ftz Float32Regs:$x, Float32Regs:$y), CvtRMI_FTZ), 930 Float32Regs:$y))>, 931 Requires<[doF32FTZ]>; 932def : Pat<(frem Float32Regs:$x, fpimm:$y), 933 (FSUBf32rr_ftz Float32Regs:$x, (FMULf32ri_ftz (CVT_f32_f32 934 (FDIV32ri_prec_ftz Float32Regs:$x, fpimm:$y), CvtRMI_FTZ), 935 fpimm:$y))>, 936 Requires<[doF32FTZ]>; 937 938// frem - f32 939def : Pat<(frem Float32Regs:$x, Float32Regs:$y), 940 (FSUBf32rr Float32Regs:$x, (FMULf32rr (CVT_f32_f32 941 (FDIV32rr_prec Float32Regs:$x, Float32Regs:$y), CvtRMI), 942 Float32Regs:$y))>; 943def : Pat<(frem Float32Regs:$x, fpimm:$y), 944 (FSUBf32rr Float32Regs:$x, (FMULf32ri (CVT_f32_f32 945 (FDIV32ri_prec Float32Regs:$x, fpimm:$y), CvtRMI), 946 fpimm:$y))>; 947 948// frem - f64 949def : Pat<(frem Float64Regs:$x, Float64Regs:$y), 950 (FSUBf64rr Float64Regs:$x, (FMULf64rr (CVT_f64_f64 951 (FDIV64rr Float64Regs:$x, Float64Regs:$y), CvtRMI), 952 Float64Regs:$y))>; 953def : Pat<(frem Float64Regs:$x, fpimm:$y), 954 (FSUBf64rr Float64Regs:$x, (FMULf64ri (CVT_f64_f64 955 (FDIV64ri Float64Regs:$x, fpimm:$y), CvtRMI), 956 fpimm:$y))>; 957 958//----------------------------------- 959// Bitwise operations 960//----------------------------------- 961 962// Template for three-arg bitwise operations. Takes three args, Creates .b16, 963// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr. 964multiclass BITWISE<string OpcStr, SDNode OpNode> { 965 def b1rr : 966 NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 967 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 968 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 969 def b1ri : 970 NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 971 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 972 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; 973 def b16rr : 974 NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 975 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 976 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; 977 def b16ri : 978 NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 979 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 980 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; 981 def b32rr : 982 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 983 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 984 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; 985 def b32ri : 986 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 987 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 988 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 989 def b64rr : 990 NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 991 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 992 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; 993 def b64ri : 994 NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 995 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 996 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 997} 998 999defm OR : BITWISE<"or", or>; 1000defm AND : BITWISE<"and", and>; 1001defm XOR : BITWISE<"xor", xor>; 1002 1003def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), 1004 "not.pred \t$dst, $src;", 1005 [(set Int1Regs:$dst, (not Int1Regs:$src))]>; 1006def NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 1007 "not.b16 \t$dst, $src;", 1008 [(set Int16Regs:$dst, (not Int16Regs:$src))]>; 1009def NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 1010 "not.b32 \t$dst, $src;", 1011 [(set Int32Regs:$dst, (not Int32Regs:$src))]>; 1012def NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 1013 "not.b64 \t$dst, $src;", 1014 [(set Int64Regs:$dst, (not Int64Regs:$src))]>; 1015 1016// Template for left/right shifts. Takes three operands, 1017// [dest (reg), src (reg), shift (reg or imm)]. 1018// dest and src may be int64, int32, or int16, but shift is always int32. 1019// 1020// This template also defines a 32-bit shift (imm, imm) instruction. 1021multiclass SHIFT<string OpcStr, SDNode OpNode> { 1022 def i64rr : 1023 NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b), 1024 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1025 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int32Regs:$b))]>; 1026 def i64ri : 1027 NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), 1028 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1029 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 imm:$b)))]>; 1030 def i32rr : 1031 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 1032 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1033 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; 1034 def i32ri : 1035 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1036 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1037 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, (i32 imm:$b)))]>; 1038 def i32ii : 1039 NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), 1040 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1041 [(set Int32Regs:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>; 1042 def i16rr : 1043 NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b), 1044 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1045 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int32Regs:$b))]>; 1046 def i16ri : 1047 NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 1048 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1049 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>; 1050} 1051 1052defm SHL : SHIFT<"shl.b", shl>; 1053defm SRA : SHIFT<"shr.s", sra>; 1054defm SRL : SHIFT<"shr.u", srl>; 1055 1056// 1057// Rotate: Use ptx shf instruction if available. 1058// 1059 1060// 32 bit r2 = rotl r1, n 1061// => 1062// r2 = shf.l r1, r1, n 1063def ROTL32imm_hw : 1064 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), 1065 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 1066 [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>, 1067 Requires<[hasHWROT32]>; 1068 1069def ROTL32reg_hw : 1070 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), 1071 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 1072 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, 1073 Requires<[hasHWROT32]>; 1074 1075// 32 bit r2 = rotr r1, n 1076// => 1077// r2 = shf.r r1, r1, n 1078def ROTR32imm_hw : 1079 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt), 1080 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", 1081 [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>, 1082 Requires<[hasHWROT32]>; 1083 1084def ROTR32reg_hw : 1085 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), 1086 "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", 1087 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, 1088 Requires<[hasHWROT32]>; 1089 1090// 32-bit software rotate by immediate. $amt2 should equal 32 - $amt1. 1091def ROT32imm_sw : 1092 NVPTXInst<(outs Int32Regs:$dst), 1093 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), 1094 "{{\n\t" 1095 ".reg .b32 %lhs;\n\t" 1096 ".reg .b32 %rhs;\n\t" 1097 "shl.b32 \t%lhs, $src, $amt1;\n\t" 1098 "shr.b32 \t%rhs, $src, $amt2;\n\t" 1099 "add.u32 \t$dst, %lhs, %rhs;\n\t" 1100 "}}", 1101 []>; 1102 1103def SUB_FRM_32 : SDNodeXForm<imm, [{ 1104 return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32); 1105}]>; 1106 1107def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), 1108 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 1109 Requires<[noHWROT32]>; 1110def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), 1111 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, 1112 Requires<[noHWROT32]>; 1113 1114// 32-bit software rotate left by register. 1115def ROTL32reg_sw : 1116 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), 1117 "{{\n\t" 1118 ".reg .b32 %lhs;\n\t" 1119 ".reg .b32 %rhs;\n\t" 1120 ".reg .b32 %amt2;\n\t" 1121 "shl.b32 \t%lhs, $src, $amt;\n\t" 1122 "sub.s32 \t%amt2, 32, $amt;\n\t" 1123 "shr.b32 \t%rhs, $src, %amt2;\n\t" 1124 "add.u32 \t$dst, %lhs, %rhs;\n\t" 1125 "}}", 1126 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, 1127 Requires<[noHWROT32]>; 1128 1129// 32-bit software rotate right by register. 1130def ROTR32reg_sw : 1131 NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), 1132 "{{\n\t" 1133 ".reg .b32 %lhs;\n\t" 1134 ".reg .b32 %rhs;\n\t" 1135 ".reg .b32 %amt2;\n\t" 1136 "shr.b32 \t%lhs, $src, $amt;\n\t" 1137 "sub.s32 \t%amt2, 32, $amt;\n\t" 1138 "shl.b32 \t%rhs, $src, %amt2;\n\t" 1139 "add.u32 \t$dst, %lhs, %rhs;\n\t" 1140 "}}", 1141 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, 1142 Requires<[noHWROT32]>; 1143 1144// 64-bit software rotate by immediate. $amt2 should equal 64 - $amt1. 1145def ROT64imm_sw : 1146 NVPTXInst<(outs Int64Regs:$dst), 1147 (ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2), 1148 "{{\n\t" 1149 ".reg .b64 %lhs;\n\t" 1150 ".reg .b64 %rhs;\n\t" 1151 "shl.b64 \t%lhs, $src, $amt1;\n\t" 1152 "shr.b64 \t%rhs, $src, $amt2;\n\t" 1153 "add.u64 \t$dst, %lhs, %rhs;\n\t" 1154 "}}", 1155 []>; 1156 1157def SUB_FRM_64 : SDNodeXForm<imm, [{ 1158 return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32); 1159}]>; 1160 1161def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), 1162 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; 1163def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), 1164 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; 1165 1166// 64-bit software rotate left by register. 1167def ROTL64reg_sw : 1168 NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), 1169 "{{\n\t" 1170 ".reg .b64 %lhs;\n\t" 1171 ".reg .b64 %rhs;\n\t" 1172 ".reg .u32 %amt2;\n\t" 1173 "shl.b64 \t%lhs, $src, $amt;\n\t" 1174 "sub.u32 \t%amt2, 64, $amt;\n\t" 1175 "shr.b64 \t%rhs, $src, %amt2;\n\t" 1176 "add.u64 \t$dst, %lhs, %rhs;\n\t" 1177 "}}", 1178 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; 1179 1180def ROTR64reg_sw : 1181 NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt), 1182 "{{\n\t" 1183 ".reg .b64 %lhs;\n\t" 1184 ".reg .b64 %rhs;\n\t" 1185 ".reg .u32 %amt2;\n\t" 1186 "shr.b64 \t%lhs, $src, $amt;\n\t" 1187 "sub.u32 \t%amt2, 64, $amt;\n\t" 1188 "shl.b64 \t%rhs, $src, %amt2;\n\t" 1189 "add.u64 \t$dst, %lhs, %rhs;\n\t" 1190 "}}", 1191 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; 1192 1193// 1194// Funnnel shift in clamp mode 1195// 1196 1197// Create SDNodes so they can be used in the DAG code, e.g. 1198// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) 1199def SDTIntShiftDOp : 1200 SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, 1201 SDTCisInt<0>, SDTCisInt<3>]>; 1202def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; 1203def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; 1204 1205def FUNSHFLCLAMP : 1206 NVPTXInst<(outs Int32Regs:$dst), 1207 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 1208 "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", 1209 [(set Int32Regs:$dst, 1210 (FUN_SHFL_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>; 1211 1212def FUNSHFRCLAMP : 1213 NVPTXInst<(outs Int32Regs:$dst), 1214 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 1215 "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", 1216 [(set Int32Regs:$dst, 1217 (FUN_SHFR_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>; 1218 1219// 1220// BFE - bit-field extract 1221// 1222 1223// Template for BFE instructions. Takes four args, 1224// [dest (reg), src (reg), start (reg or imm), end (reg or imm)]. 1225// Start may be an imm only if end is also an imm. FIXME: Is this a 1226// restriction in PTX? 1227// 1228// dest and src may be int32 or int64, but start and end are always int32. 1229multiclass BFE<string TyStr, RegisterClass RC> { 1230 def rrr 1231 : NVPTXInst<(outs RC:$d), 1232 (ins RC:$a, Int32Regs:$b, Int32Regs:$c), 1233 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; 1234 def rri 1235 : NVPTXInst<(outs RC:$d), 1236 (ins RC:$a, Int32Regs:$b, i32imm:$c), 1237 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; 1238 def rii 1239 : NVPTXInst<(outs RC:$d), 1240 (ins RC:$a, i32imm:$b, i32imm:$c), 1241 !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; 1242} 1243 1244let hasSideEffects = 0 in { 1245 defm BFE_S32 : BFE<"s32", Int32Regs>; 1246 defm BFE_U32 : BFE<"u32", Int32Regs>; 1247 defm BFE_S64 : BFE<"s64", Int64Regs>; 1248 defm BFE_U64 : BFE<"u64", Int64Regs>; 1249} 1250 1251//----------------------------------- 1252// Comparison instructions (setp, set) 1253//----------------------------------- 1254 1255// FIXME: This doesn't cover versions of set and setp that combine with a 1256// boolean predicate, e.g. setp.eq.and.b16. 1257 1258let hasSideEffects = 0 in { 1259 multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> { 1260 def rr : 1261 NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp), 1262 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, 1263 "\t$dst, $a, $b;"), []>; 1264 def ri : 1265 NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp), 1266 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, 1267 "\t$dst, $a, $b;"), []>; 1268 def ir : 1269 NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp), 1270 !strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr, 1271 "\t$dst, $a, $b;"), []>; 1272 } 1273} 1274 1275defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>; 1276defm SETP_s16 : SETP<"s16", Int16Regs, i16imm>; 1277defm SETP_u16 : SETP<"u16", Int16Regs, i16imm>; 1278defm SETP_b32 : SETP<"b32", Int32Regs, i32imm>; 1279defm SETP_s32 : SETP<"s32", Int32Regs, i32imm>; 1280defm SETP_u32 : SETP<"u32", Int32Regs, i32imm>; 1281defm SETP_b64 : SETP<"b64", Int64Regs, i64imm>; 1282defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>; 1283defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>; 1284defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>; 1285defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; 1286 1287// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form 1288// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination 1289// reg, either u32, s32, or f32. Anyway these aren't used at the moment. 1290 1291let hasSideEffects = 0 in { 1292 multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> { 1293 def rr : NVPTXInst<(outs Int32Regs:$dst), 1294 (ins RC:$a, RC:$b, CmpMode:$cmp), 1295 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; 1296 def ri : NVPTXInst<(outs Int32Regs:$dst), 1297 (ins RC:$a, ImmCls:$b, CmpMode:$cmp), 1298 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; 1299 def ir : NVPTXInst<(outs Int32Regs:$dst), 1300 (ins ImmCls:$a, RC:$b, CmpMode:$cmp), 1301 !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>; 1302 } 1303} 1304 1305defm SET_b16 : SET<"b16", Int16Regs, i16imm>; 1306defm SET_s16 : SET<"s16", Int16Regs, i16imm>; 1307defm SET_u16 : SET<"u16", Int16Regs, i16imm>; 1308defm SET_b32 : SET<"b32", Int32Regs, i32imm>; 1309defm SET_s32 : SET<"s32", Int32Regs, i32imm>; 1310defm SET_u32 : SET<"u32", Int32Regs, i32imm>; 1311defm SET_b64 : SET<"b64", Int64Regs, i64imm>; 1312defm SET_s64 : SET<"s64", Int64Regs, i64imm>; 1313defm SET_u64 : SET<"u64", Int64Regs, i64imm>; 1314defm SET_f32 : SET<"f32", Float32Regs, f32imm>; 1315defm SET_f64 : SET<"f64", Float64Regs, f64imm>; 1316 1317//----------------------------------- 1318// Selection instructions (selp) 1319//----------------------------------- 1320 1321// FIXME: Missing slct 1322 1323// selp instructions that don't have any pattern matches; we explicitly use 1324// them within this file. 1325let hasSideEffects = 0 in { 1326 multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> { 1327 def rr : NVPTXInst<(outs RC:$dst), 1328 (ins RC:$a, RC:$b, Int1Regs:$p), 1329 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; 1330 def ri : NVPTXInst<(outs RC:$dst), 1331 (ins RC:$a, ImmCls:$b, Int1Regs:$p), 1332 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; 1333 def ir : NVPTXInst<(outs RC:$dst), 1334 (ins ImmCls:$a, RC:$b, Int1Regs:$p), 1335 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; 1336 def ii : NVPTXInst<(outs RC:$dst), 1337 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), 1338 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>; 1339 } 1340 1341 multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls, 1342 SDNode ImmNode> { 1343 def rr : 1344 NVPTXInst<(outs RC:$dst), 1345 (ins RC:$a, RC:$b, Int1Regs:$p), 1346 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), 1347 [(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>; 1348 def ri : 1349 NVPTXInst<(outs RC:$dst), 1350 (ins RC:$a, ImmCls:$b, Int1Regs:$p), 1351 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), 1352 [(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>; 1353 def ir : 1354 NVPTXInst<(outs RC:$dst), 1355 (ins ImmCls:$a, RC:$b, Int1Regs:$p), 1356 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), 1357 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>; 1358 def ii : 1359 NVPTXInst<(outs RC:$dst), 1360 (ins ImmCls:$a, ImmCls:$b, Int1Regs:$p), 1361 !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), 1362 [(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>; 1363 } 1364} 1365 1366// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as 1367// good. 1368defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>; 1369defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>; 1370defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>; 1371defm SELP_b32 : SELP_PATTERN<"b32", Int32Regs, i32imm, imm>; 1372defm SELP_s32 : SELP<"s32", Int32Regs, i32imm>; 1373defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>; 1374defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>; 1375defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; 1376defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; 1377defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>; 1378defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>; 1379 1380//----------------------------------- 1381// Data Movement (Load / Store, Move) 1382//----------------------------------- 1383 1384def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], 1385 [SDNPWantRoot]>; 1386def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex], 1387 [SDNPWantRoot]>; 1388 1389def MEMri : Operand<i32> { 1390 let PrintMethod = "printMemOperand"; 1391 let MIOperandInfo = (ops Int32Regs, i32imm); 1392} 1393def MEMri64 : Operand<i64> { 1394 let PrintMethod = "printMemOperand"; 1395 let MIOperandInfo = (ops Int64Regs, i64imm); 1396} 1397 1398def imem : Operand<iPTR> { 1399 let PrintMethod = "printOperand"; 1400} 1401 1402def imemAny : Operand<iPTRAny> { 1403 let PrintMethod = "printOperand"; 1404} 1405 1406def LdStCode : Operand<i32> { 1407 let PrintMethod = "printLdStCode"; 1408} 1409 1410def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; 1411def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; 1412 1413// Load a memory address into a u32 or u64 register. 1414def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), 1415 "mov.u32 \t$dst, $a;", 1416 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; 1417def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), 1418 "mov.u64 \t$dst, $a;", 1419 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; 1420 1421// Get pointer to local stack. 1422let hasSideEffects = 0 in { 1423 def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), 1424 "mov.u32 \t$d, __local_depot$num;", []>; 1425 def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), 1426 "mov.u64 \t$d, __local_depot$num;", []>; 1427} 1428 1429 1430// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp 1431let IsSimpleMove=1, hasSideEffects=0 in { 1432 def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), 1433 "mov.pred \t$dst, $sss;", []>; 1434 def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), 1435 "mov.u16 \t$dst, $sss;", []>; 1436 def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), 1437 "mov.u32 \t$dst, $sss;", []>; 1438 def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), 1439 "mov.u64 \t$dst, $sss;", []>; 1440 1441 def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 1442 "mov.f32 \t$dst, $src;", []>; 1443 def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), 1444 "mov.f64 \t$dst, $src;", []>; 1445} 1446 1447def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), 1448 "mov.pred \t$dst, $src;", 1449 [(set Int1Regs:$dst, imm:$src)]>; 1450def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), 1451 "mov.u16 \t$dst, $src;", 1452 [(set Int16Regs:$dst, imm:$src)]>; 1453def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), 1454 "mov.u32 \t$dst, $src;", 1455 [(set Int32Regs:$dst, imm:$src)]>; 1456def IMOV64i : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), 1457 "mov.u64 \t$dst, $src;", 1458 [(set Int64Regs:$dst, imm:$src)]>; 1459 1460def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), 1461 "mov.f32 \t$dst, $src;", 1462 [(set Float32Regs:$dst, fpimm:$src)]>; 1463def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), 1464 "mov.f64 \t$dst, $src;", 1465 [(set Float64Regs:$dst, fpimm:$src)]>; 1466 1467def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; 1468 1469//---- Copy Frame Index ---- 1470def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), 1471 "add.u32 \t$dst, ${addr:add};", 1472 [(set Int32Regs:$dst, ADDRri:$addr)]>; 1473def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), 1474 "add.u64 \t$dst, ${addr:add};", 1475 [(set Int64Regs:$dst, ADDRri64:$addr)]>; 1476 1477//----------------------------------- 1478// Comparison and Selection 1479//----------------------------------- 1480 1481multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode, 1482 Instruction setp_16rr, 1483 Instruction setp_16ri, 1484 Instruction setp_16ir, 1485 Instruction setp_32rr, 1486 Instruction setp_32ri, 1487 Instruction setp_32ir, 1488 Instruction setp_64rr, 1489 Instruction setp_64ri, 1490 Instruction setp_64ir, 1491 Instruction set_16rr, 1492 Instruction set_16ri, 1493 Instruction set_16ir, 1494 Instruction set_32rr, 1495 Instruction set_32ri, 1496 Instruction set_32ir, 1497 Instruction set_64rr, 1498 Instruction set_64ri, 1499 Instruction set_64ir> { 1500 // i16 -> pred 1501 def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), 1502 (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; 1503 def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), 1504 (setp_16ri Int16Regs:$a, imm:$b, Mode)>; 1505 def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), 1506 (setp_16ir imm:$a, Int16Regs:$b, Mode)>; 1507 // i32 -> pred 1508 def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)), 1509 (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; 1510 def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)), 1511 (setp_32ri Int32Regs:$a, imm:$b, Mode)>; 1512 def : Pat<(i1 (OpNode imm:$a, Int32Regs:$b)), 1513 (setp_32ir imm:$a, Int32Regs:$b, Mode)>; 1514 // i64 -> pred 1515 def : Pat<(i1 (OpNode Int64Regs:$a, Int64Regs:$b)), 1516 (setp_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; 1517 def : Pat<(i1 (OpNode Int64Regs:$a, imm:$b)), 1518 (setp_64ri Int64Regs:$a, imm:$b, Mode)>; 1519 def : Pat<(i1 (OpNode imm:$a, Int64Regs:$b)), 1520 (setp_64ir imm:$a, Int64Regs:$b, Mode)>; 1521 1522 // i16 -> i32 1523 def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), 1524 (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; 1525 def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), 1526 (set_16ri Int16Regs:$a, imm:$b, Mode)>; 1527 def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), 1528 (set_16ir imm:$a, Int16Regs:$b, Mode)>; 1529 // i32 -> i32 1530 def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)), 1531 (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; 1532 def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)), 1533 (set_32ri Int32Regs:$a, imm:$b, Mode)>; 1534 def : Pat<(i32 (OpNode imm:$a, Int32Regs:$b)), 1535 (set_32ir imm:$a, Int32Regs:$b, Mode)>; 1536 // i64 -> i32 1537 def : Pat<(i32 (OpNode Int64Regs:$a, Int64Regs:$b)), 1538 (set_64rr Int64Regs:$a, Int64Regs:$b, Mode)>; 1539 def : Pat<(i32 (OpNode Int64Regs:$a, imm:$b)), 1540 (set_64ri Int64Regs:$a, imm:$b, Mode)>; 1541 def : Pat<(i32 (OpNode imm:$a, Int64Regs:$b)), 1542 (set_64ir imm:$a, Int64Regs:$b, Mode)>; 1543} 1544 1545multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode> 1546 : ISET_FORMAT<OpNode, Mode, 1547 SETP_s16rr, SETP_s16ri, SETP_s16ir, 1548 SETP_s32rr, SETP_s32ri, SETP_s32ir, 1549 SETP_s64rr, SETP_s64ri, SETP_s64ir, 1550 SET_s16rr, SET_s16ri, SET_s16ir, 1551 SET_s32rr, SET_s32ri, SET_s32ir, 1552 SET_s64rr, SET_s64ri, SET_s64ir> { 1553 // TableGen doesn't like empty multiclasses. 1554 def : PatLeaf<(i32 0)>; 1555} 1556 1557multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode> 1558 : ISET_FORMAT<OpNode, Mode, 1559 SETP_u16rr, SETP_u16ri, SETP_u16ir, 1560 SETP_u32rr, SETP_u32ri, SETP_u32ir, 1561 SETP_u64rr, SETP_u64ri, SETP_u64ir, 1562 SET_u16rr, SET_u16ri, SET_u16ir, 1563 SET_u32rr, SET_u32ri, SET_u32ir, 1564 SET_u64rr, SET_u64ri, SET_u64ir> { 1565 // TableGen doesn't like empty multiclasses. 1566 def : PatLeaf<(i32 0)>; 1567} 1568 1569defm : ISET_FORMAT_SIGNED<setgt, CmpGT>; 1570defm : ISET_FORMAT_SIGNED<setlt, CmpLT>; 1571defm : ISET_FORMAT_SIGNED<setge, CmpGE>; 1572defm : ISET_FORMAT_SIGNED<setle, CmpLE>; 1573defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>; 1574defm : ISET_FORMAT_SIGNED<setne, CmpNE>; 1575defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>; 1576defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>; 1577defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>; 1578defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>; 1579defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>; 1580defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>; 1581 1582// i1 compares 1583def : Pat<(setne Int1Regs:$a, Int1Regs:$b), 1584 (XORb1rr Int1Regs:$a, Int1Regs:$b)>; 1585def : Pat<(setune Int1Regs:$a, Int1Regs:$b), 1586 (XORb1rr Int1Regs:$a, Int1Regs:$b)>; 1587 1588def : Pat<(seteq Int1Regs:$a, Int1Regs:$b), 1589 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 1590def : Pat<(setueq Int1Regs:$a, Int1Regs:$b), 1591 (NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 1592 1593// i1 compare -> i32 1594def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), 1595 (SELP_u32ii -1, 0, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 1596def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)), 1597 (SELP_u32ii 0, -1, (XORb1rr Int1Regs:$a, Int1Regs:$b))>; 1598 1599 1600 1601multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> { 1602 // f32 -> pred 1603 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), 1604 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, 1605 Requires<[doF32FTZ]>; 1606 def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)), 1607 (SETP_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; 1608 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), 1609 (SETP_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, 1610 Requires<[doF32FTZ]>; 1611 def : Pat<(i1 (OpNode Float32Regs:$a, fpimm:$b)), 1612 (SETP_f32ri Float32Regs:$a, fpimm:$b, Mode)>; 1613 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), 1614 (SETP_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, 1615 Requires<[doF32FTZ]>; 1616 def : Pat<(i1 (OpNode fpimm:$a, Float32Regs:$b)), 1617 (SETP_f32ir fpimm:$a, Float32Regs:$b, Mode)>; 1618 1619 // f64 -> pred 1620 def : Pat<(i1 (OpNode Float64Regs:$a, Float64Regs:$b)), 1621 (SETP_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; 1622 def : Pat<(i1 (OpNode Float64Regs:$a, fpimm:$b)), 1623 (SETP_f64ri Float64Regs:$a, fpimm:$b, Mode)>; 1624 def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)), 1625 (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; 1626 1627 // f32 -> i32 1628 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), 1629 (SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>, 1630 Requires<[doF32FTZ]>; 1631 def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)), 1632 (SET_f32rr Float32Regs:$a, Float32Regs:$b, Mode)>; 1633 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), 1634 (SET_f32ri Float32Regs:$a, fpimm:$b, ModeFTZ)>, 1635 Requires<[doF32FTZ]>; 1636 def : Pat<(i32 (OpNode Float32Regs:$a, fpimm:$b)), 1637 (SET_f32ri Float32Regs:$a, fpimm:$b, Mode)>; 1638 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), 1639 (SET_f32ir fpimm:$a, Float32Regs:$b, ModeFTZ)>, 1640 Requires<[doF32FTZ]>; 1641 def : Pat<(i32 (OpNode fpimm:$a, Float32Regs:$b)), 1642 (SET_f32ir fpimm:$a, Float32Regs:$b, Mode)>; 1643 1644 // f64 -> i32 1645 def : Pat<(i32 (OpNode Float64Regs:$a, Float64Regs:$b)), 1646 (SET_f64rr Float64Regs:$a, Float64Regs:$b, Mode)>; 1647 def : Pat<(i32 (OpNode Float64Regs:$a, fpimm:$b)), 1648 (SET_f64ri Float64Regs:$a, fpimm:$b, Mode)>; 1649 def : Pat<(i32 (OpNode fpimm:$a, Float64Regs:$b)), 1650 (SET_f64ir fpimm:$a, Float64Regs:$b, Mode)>; 1651} 1652 1653defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>; 1654defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>; 1655defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>; 1656defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>; 1657defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>; 1658defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>; 1659 1660defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>; 1661defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>; 1662defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>; 1663defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>; 1664defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>; 1665defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>; 1666 1667defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>; 1668defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>; 1669defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>; 1670defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>; 1671defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>; 1672defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>; 1673 1674defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>; 1675defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>; 1676 1677// FIXME: What is this doing here? Can it be deleted? 1678// def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, 1679// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 1680 1681def SDTDeclareParamProfile : 1682 SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; 1683def SDTDeclareScalarParamProfile : 1684 SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; 1685def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 1686def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>; 1687def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>; 1688def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 1689def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 1690def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 1691def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>; 1692def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>; 1693def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 1694def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 1695def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; 1696def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; 1697def SDTCallValProfile : SDTypeProfile<1, 0, []>; 1698def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; 1699def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 1700def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; 1701def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; 1702def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; 1703 1704def DeclareParam : 1705 SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, 1706 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1707def DeclareScalarParam : 1708 SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile, 1709 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1710def DeclareRetParam : 1711 SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile, 1712 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1713def DeclareRet : 1714 SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, 1715 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1716def LoadParam : 1717 SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, 1718 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 1719def LoadParamV2 : 1720 SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile, 1721 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 1722def LoadParamV4 : 1723 SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile, 1724 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 1725def PrintCall : 1726 SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, 1727 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1728def PrintConvergentCall : 1729 SDNode<"NVPTXISD::PrintConvergentCall", SDTPrintCallProfile, 1730 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1731def PrintCallUni : 1732 SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, 1733 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1734def PrintConvergentCallUni : 1735 SDNode<"NVPTXISD::PrintConvergentCallUni", SDTPrintCallUniProfile, 1736 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1737def StoreParam : 1738 SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, 1739 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1740def StoreParamV2 : 1741 SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile, 1742 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1743def StoreParamV4 : 1744 SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile, 1745 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1746def StoreParamU32 : 1747 SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, 1748 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1749def StoreParamS32 : 1750 SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, 1751 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1752def CallArgBegin : 1753 SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, 1754 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1755def CallArg : 1756 SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, 1757 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1758def LastCallArg : 1759 SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, 1760 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1761def CallArgEnd : 1762 SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, 1763 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1764def CallVoid : 1765 SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, 1766 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1767def Prototype : 1768 SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, 1769 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1770def CallVal : 1771 SDNode<"NVPTXISD::CallVal", SDTCallValProfile, 1772 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1773def MoveParam : 1774 SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; 1775def StoreRetval : 1776 SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, 1777 [SDNPHasChain, SDNPSideEffect]>; 1778def StoreRetvalV2 : 1779 SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, 1780 [SDNPHasChain, SDNPSideEffect]>; 1781def StoreRetvalV4 : 1782 SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, 1783 [SDNPHasChain, SDNPSideEffect]>; 1784def PseudoUseParam : 1785 SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile, 1786 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1787def RETURNNode : 1788 SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, 1789 [SDNPHasChain, SDNPSideEffect]>; 1790 1791let mayLoad = 1 in { 1792 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> : 1793 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 1794 !strconcat(!strconcat("ld.param", opstr), 1795 "\t$dst, [retval0+$b];"), 1796 []>; 1797 1798 class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> : 1799 NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b), 1800 !strconcat("ld.param.v2", opstr, 1801 "\t{{$dst, $dst2}}, [retval0+$b];"), []>; 1802 1803 class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> : 1804 NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3, 1805 regclass:$dst4), 1806 (ins i32imm:$b), 1807 !strconcat("ld.param.v4", opstr, 1808 "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"), 1809 []>; 1810} 1811 1812class LoadParamRegInst<NVPTXRegClass regclass, string opstr> : 1813 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 1814 !strconcat("mov", opstr, "\t$dst, retval$b;"), 1815 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; 1816 1817let mayStore = 1 in { 1818 class StoreParamInst<NVPTXRegClass regclass, string opstr> : 1819 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), 1820 !strconcat("st.param", opstr, "\t[param$a+$b], $val;"), 1821 []>; 1822 1823 class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> : 1824 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, 1825 i32imm:$a, i32imm:$b), 1826 !strconcat("st.param.v2", opstr, 1827 "\t[param$a+$b], {{$val, $val2}};"), 1828 []>; 1829 1830 class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> : 1831 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, regclass:$val3, 1832 regclass:$val4, i32imm:$a, 1833 i32imm:$b), 1834 !strconcat("st.param.v4", opstr, 1835 "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"), 1836 []>; 1837 1838 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> : 1839 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), 1840 !strconcat("st.param", opstr, "\t[func_retval0+$a], $val;"), 1841 []>; 1842 1843 class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> : 1844 NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a), 1845 !strconcat("st.param.v2", opstr, 1846 "\t[func_retval0+$a], {{$val, $val2}};"), 1847 []>; 1848 1849 class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> : 1850 NVPTXInst<(outs), 1851 (ins regclass:$val, regclass:$val2, regclass:$val3, 1852 regclass:$val4, i32imm:$a), 1853 !strconcat("st.param.v4", opstr, 1854 "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"), 1855 []>; 1856} 1857 1858let isCall=1 in { 1859 multiclass CALL<string OpcStr, SDNode OpNode> { 1860 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), 1861 !strconcat(OpcStr, " "), [(OpNode (i32 0))]>; 1862 def PrintCallRetInst1 : NVPTXInst<(outs), (ins), 1863 !strconcat(OpcStr, " (retval0), "), [(OpNode (i32 1))]>; 1864 def PrintCallRetInst2 : NVPTXInst<(outs), (ins), 1865 !strconcat(OpcStr, " (retval0, retval1), "), [(OpNode (i32 2))]>; 1866 def PrintCallRetInst3 : NVPTXInst<(outs), (ins), 1867 !strconcat(OpcStr, " (retval0, retval1, retval2), "), [(OpNode (i32 3))]>; 1868 def PrintCallRetInst4 : NVPTXInst<(outs), (ins), 1869 !strconcat(OpcStr, " (retval0, retval1, retval2, retval3), "), 1870 [(OpNode (i32 4))]>; 1871 def PrintCallRetInst5 : NVPTXInst<(outs), (ins), 1872 !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4), "), 1873 [(OpNode (i32 5))]>; 1874 def PrintCallRetInst6 : NVPTXInst<(outs), (ins), 1875 !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " 1876 "retval5), "), 1877 [(OpNode (i32 6))]>; 1878 def PrintCallRetInst7 : NVPTXInst<(outs), (ins), 1879 !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " 1880 "retval5, retval6), "), 1881 [(OpNode (i32 7))]>; 1882 def PrintCallRetInst8 : NVPTXInst<(outs), (ins), 1883 !strconcat(OpcStr, " (retval0, retval1, retval2, retval3, retval4, " 1884 "retval5, retval6, retval7), "), 1885 [(OpNode (i32 8))]>; 1886 } 1887} 1888 1889defm Call : CALL<"call", PrintCall>; 1890defm CallUni : CALL<"call.uni", PrintCallUni>; 1891 1892// Convergent call instructions. These are identical to regular calls, except 1893// they have the isConvergent bit set. 1894let isConvergent=1 in { 1895 defm ConvergentCall : CALL<"call", PrintConvergentCall>; 1896 defm ConvergentCallUni : CALL<"call.uni", PrintConvergentCallUni>; 1897} 1898 1899def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">; 1900def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">; 1901def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">; 1902def LoadParamMemI8 : LoadParamMemInst<Int16Regs, ".b8">; 1903def LoadParamMemV2I64 : LoadParamV2MemInst<Int64Regs, ".b64">; 1904def LoadParamMemV2I32 : LoadParamV2MemInst<Int32Regs, ".b32">; 1905def LoadParamMemV2I16 : LoadParamV2MemInst<Int16Regs, ".b16">; 1906def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">; 1907def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">; 1908def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">; 1909def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">; 1910def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">; 1911def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">; 1912def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">; 1913def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">; 1914def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">; 1915 1916def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">; 1917def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">; 1918 1919def StoreParamI16 : StoreParamInst<Int16Regs, ".b16">; 1920def StoreParamI8 : StoreParamInst<Int16Regs, ".b8">; 1921def StoreParamV2I64 : StoreParamV2Inst<Int64Regs, ".b64">; 1922def StoreParamV2I32 : StoreParamV2Inst<Int32Regs, ".b32">; 1923def StoreParamV2I16 : StoreParamV2Inst<Int16Regs, ".b16">; 1924def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">; 1925 1926def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">; 1927def StoreParamV4I16 : StoreParamV4Inst<Int16Regs, ".b16">; 1928def StoreParamV4I8 : StoreParamV4Inst<Int16Regs, ".b8">; 1929 1930def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">; 1931def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">; 1932def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">; 1933def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">; 1934def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">; 1935 1936def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">; 1937def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">; 1938def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">; 1939def StoreRetvalI8 : StoreRetvalInst<Int16Regs, ".b8">; 1940def StoreRetvalV2I64 : StoreRetvalV2Inst<Int64Regs, ".b64">; 1941def StoreRetvalV2I32 : StoreRetvalV2Inst<Int32Regs, ".b32">; 1942def StoreRetvalV2I16 : StoreRetvalV2Inst<Int16Regs, ".b16">; 1943def StoreRetvalV2I8 : StoreRetvalV2Inst<Int16Regs, ".b8">; 1944def StoreRetvalV4I32 : StoreRetvalV4Inst<Int32Regs, ".b32">; 1945def StoreRetvalV4I16 : StoreRetvalV4Inst<Int16Regs, ".b16">; 1946def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">; 1947 1948def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">; 1949def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">; 1950def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">; 1951def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">; 1952def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">; 1953 1954def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; 1955def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; 1956def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; 1957def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; 1958 1959class CallArgInst<NVPTXRegClass regclass> : 1960 NVPTXInst<(outs), (ins regclass:$a), "$a, ", 1961 [(CallArg (i32 0), regclass:$a)]>; 1962 1963class LastCallArgInst<NVPTXRegClass regclass> : 1964 NVPTXInst<(outs), (ins regclass:$a), "$a", 1965 [(LastCallArg (i32 0), regclass:$a)]>; 1966 1967def CallArgI64 : CallArgInst<Int64Regs>; 1968def CallArgI32 : CallArgInst<Int32Regs>; 1969def CallArgI16 : CallArgInst<Int16Regs>; 1970def CallArgF64 : CallArgInst<Float64Regs>; 1971def CallArgF32 : CallArgInst<Float32Regs>; 1972 1973def LastCallArgI64 : LastCallArgInst<Int64Regs>; 1974def LastCallArgI32 : LastCallArgInst<Int32Regs>; 1975def LastCallArgI16 : LastCallArgInst<Int16Regs>; 1976def LastCallArgF64 : LastCallArgInst<Float64Regs>; 1977def LastCallArgF32 : LastCallArgInst<Float32Regs>; 1978 1979def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", 1980 [(CallArg (i32 0), (i32 imm:$a))]>; 1981def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", 1982 [(LastCallArg (i32 0), (i32 imm:$a))]>; 1983 1984def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", 1985 [(CallArg (i32 1), (i32 imm:$a))]>; 1986def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", 1987 [(LastCallArg (i32 1), (i32 imm:$a))]>; 1988 1989def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", 1990 [(CallVoid (Wrapper tglobaladdr:$addr))]>; 1991def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", 1992 [(CallVoid Int32Regs:$addr)]>; 1993def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ", 1994 [(CallVoid Int64Regs:$addr)]>; 1995def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", 1996 [(Prototype (i32 imm:$val))]>; 1997 1998def DeclareRetMemInst : 1999 NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num), 2000 ".param .align $align .b8 retval$num[$size];", 2001 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; 2002def DeclareRetScalarInst : 2003 NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 2004 ".param .b$size retval$num;", 2005 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; 2006def DeclareRetRegInst : 2007 NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 2008 ".reg .b$size retval$num;", 2009 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; 2010 2011def DeclareParamInst : 2012 NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size), 2013 ".param .align $align .b8 param$a[$size];", 2014 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; 2015def DeclareScalarParamInst : 2016 NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 2017 ".param .b$size param$a;", 2018 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; 2019def DeclareScalarRegInst : 2020 NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 2021 ".reg .b$size param$a;", 2022 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; 2023 2024class MoveParamInst<NVPTXRegClass regclass, string asmstr> : 2025 NVPTXInst<(outs regclass:$dst), (ins regclass:$src), 2026 !strconcat("mov", asmstr, "\t$dst, $src;"), 2027 [(set regclass:$dst, (MoveParam regclass:$src))]>; 2028 2029def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">; 2030def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">; 2031def MoveParamI16 : 2032 NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 2033 "cvt.u16.u32\t$dst, $src;", 2034 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; 2035def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">; 2036def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">; 2037 2038class PseudoUseParamInst<NVPTXRegClass regclass> : 2039 NVPTXInst<(outs), (ins regclass:$src), 2040 "// Pseudo use of $src", 2041 [(PseudoUseParam regclass:$src)]>; 2042 2043def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>; 2044def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>; 2045def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>; 2046def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>; 2047def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>; 2048 2049 2050// 2051// Load / Store Handling 2052// 2053multiclass LD<NVPTXRegClass regclass> { 2054 def _avar : NVPTXInst< 2055 (outs regclass:$dst), 2056 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2057 i32imm:$fromWidth, imem:$addr), 2058 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2059 "\t$dst, [$addr];", []>; 2060 def _areg : NVPTXInst< 2061 (outs regclass:$dst), 2062 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2063 i32imm:$fromWidth, Int32Regs:$addr), 2064 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2065 "\t$dst, [$addr];", []>; 2066 def _areg_64 : NVPTXInst< 2067 (outs regclass:$dst), 2068 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2069 i32imm:$fromWidth, Int64Regs:$addr), 2070 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2071 "\t$dst, [$addr];", []>; 2072 def _ari : NVPTXInst< 2073 (outs regclass:$dst), 2074 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2075 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2076 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2077 "\t$dst, [$addr+$offset];", []>; 2078 def _ari_64 : NVPTXInst< 2079 (outs regclass:$dst), 2080 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2081 LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2082 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2083 "\t$dst, [$addr+$offset];", []>; 2084 def _asi : NVPTXInst< 2085 (outs regclass:$dst), 2086 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2087 LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2088 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2089 "\t$dst, [$addr+$offset];", []>; 2090} 2091 2092let mayLoad=1, hasSideEffects=0 in { 2093 defm LD_i8 : LD<Int16Regs>; 2094 defm LD_i16 : LD<Int16Regs>; 2095 defm LD_i32 : LD<Int32Regs>; 2096 defm LD_i64 : LD<Int64Regs>; 2097 defm LD_f32 : LD<Float32Regs>; 2098 defm LD_f64 : LD<Float64Regs>; 2099} 2100 2101multiclass ST<NVPTXRegClass regclass> { 2102 def _avar : NVPTXInst< 2103 (outs), 2104 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2105 LdStCode:$Sign, i32imm:$toWidth, imem:$addr), 2106 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 2107 " \t[$addr], $src;", []>; 2108 def _areg : NVPTXInst< 2109 (outs), 2110 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, 2111 LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), 2112 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 2113 " \t[$addr], $src;", []>; 2114 def _areg_64 : NVPTXInst< 2115 (outs), 2116 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2117 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), 2118 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 2119 " \t[$addr], $src;", []>; 2120 def _ari : NVPTXInst< 2121 (outs), 2122 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2123 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), 2124 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 2125 " \t[$addr+$offset], $src;", []>; 2126 def _ari_64 : NVPTXInst< 2127 (outs), 2128 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2129 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), 2130 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 2131 " \t[$addr+$offset], $src;", []>; 2132 def _asi : NVPTXInst< 2133 (outs), 2134 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2135 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), 2136 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth" 2137 " \t[$addr+$offset], $src;", []>; 2138} 2139 2140let mayStore=1, hasSideEffects=0 in { 2141 defm ST_i8 : ST<Int16Regs>; 2142 defm ST_i16 : ST<Int16Regs>; 2143 defm ST_i32 : ST<Int32Regs>; 2144 defm ST_i64 : ST<Int64Regs>; 2145 defm ST_f32 : ST<Float32Regs>; 2146 defm ST_f64 : ST<Float64Regs>; 2147} 2148 2149// The following is used only in and after vector elementizations. Vector 2150// elementization happens at the machine instruction level, so the following 2151// instructions never appear in the DAG. 2152multiclass LD_VEC<NVPTXRegClass regclass> { 2153 def _v2_avar : NVPTXInst< 2154 (outs regclass:$dst1, regclass:$dst2), 2155 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2156 i32imm:$fromWidth, imem:$addr), 2157 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2158 "\t{{$dst1, $dst2}}, [$addr];", []>; 2159 def _v2_areg : NVPTXInst< 2160 (outs regclass:$dst1, regclass:$dst2), 2161 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2162 i32imm:$fromWidth, Int32Regs:$addr), 2163 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2164 "\t{{$dst1, $dst2}}, [$addr];", []>; 2165 def _v2_areg_64 : NVPTXInst< 2166 (outs regclass:$dst1, regclass:$dst2), 2167 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2168 i32imm:$fromWidth, Int64Regs:$addr), 2169 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2170 "\t{{$dst1, $dst2}}, [$addr];", []>; 2171 def _v2_ari : NVPTXInst< 2172 (outs regclass:$dst1, regclass:$dst2), 2173 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2174 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2175 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2176 "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; 2177 def _v2_ari_64 : NVPTXInst< 2178 (outs regclass:$dst1, regclass:$dst2), 2179 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2180 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2181 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2182 "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; 2183 def _v2_asi : NVPTXInst< 2184 (outs regclass:$dst1, regclass:$dst2), 2185 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2186 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2187 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2188 "\t{{$dst1, $dst2}}, [$addr+$offset];", []>; 2189 def _v4_avar : NVPTXInst< 2190 (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 2191 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2192 i32imm:$fromWidth, imem:$addr), 2193 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2194 "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; 2195 def _v4_areg : NVPTXInst< 2196 (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 2197 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2198 i32imm:$fromWidth, Int32Regs:$addr), 2199 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2200 "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; 2201 def _v4_areg_64 : NVPTXInst< 2202 (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 2203 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2204 i32imm:$fromWidth, Int64Regs:$addr), 2205 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2206 "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];", []>; 2207 def _v4_ari : NVPTXInst< 2208 (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 2209 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2210 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2211 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2212 "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; 2213 def _v4_ari_64 : NVPTXInst< 2214 (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 2215 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2216 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2217 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2218 "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; 2219 def _v4_asi : NVPTXInst< 2220 (outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4), 2221 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2222 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2223 "ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2224 "\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];", []>; 2225} 2226let mayLoad=1, hasSideEffects=0 in { 2227 defm LDV_i8 : LD_VEC<Int16Regs>; 2228 defm LDV_i16 : LD_VEC<Int16Regs>; 2229 defm LDV_i32 : LD_VEC<Int32Regs>; 2230 defm LDV_i64 : LD_VEC<Int64Regs>; 2231 defm LDV_f32 : LD_VEC<Float32Regs>; 2232 defm LDV_f64 : LD_VEC<Float64Regs>; 2233} 2234 2235multiclass ST_VEC<NVPTXRegClass regclass> { 2236 def _v2_avar : NVPTXInst< 2237 (outs), 2238 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2239 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), 2240 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2241 "\t[$addr], {{$src1, $src2}};", []>; 2242 def _v2_areg : NVPTXInst< 2243 (outs), 2244 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2245 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), 2246 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2247 "\t[$addr], {{$src1, $src2}};", []>; 2248 def _v2_areg_64 : NVPTXInst< 2249 (outs), 2250 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2251 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), 2252 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2253 "\t[$addr], {{$src1, $src2}};", []>; 2254 def _v2_ari : NVPTXInst< 2255 (outs), 2256 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2257 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, 2258 i32imm:$offset), 2259 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2260 "\t[$addr+$offset], {{$src1, $src2}};", []>; 2261 def _v2_ari_64 : NVPTXInst< 2262 (outs), 2263 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2264 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, 2265 i32imm:$offset), 2266 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2267 "\t[$addr+$offset], {{$src1, $src2}};", []>; 2268 def _v2_asi : NVPTXInst< 2269 (outs), 2270 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2271 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, 2272 i32imm:$offset), 2273 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2274 "\t[$addr+$offset], {{$src1, $src2}};", []>; 2275 def _v4_avar : NVPTXInst< 2276 (outs), 2277 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2278 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2279 i32imm:$fromWidth, imem:$addr), 2280 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2281 "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; 2282 def _v4_areg : NVPTXInst< 2283 (outs), 2284 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2285 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2286 i32imm:$fromWidth, Int32Regs:$addr), 2287 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2288 "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; 2289 def _v4_areg_64 : NVPTXInst< 2290 (outs), 2291 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2292 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2293 i32imm:$fromWidth, Int64Regs:$addr), 2294 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2295 "\t[$addr], {{$src1, $src2, $src3, $src4}};", []>; 2296 def _v4_ari : NVPTXInst< 2297 (outs), 2298 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2299 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2300 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2301 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2302 "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; 2303 def _v4_ari_64 : NVPTXInst< 2304 (outs), 2305 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2306 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2307 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2308 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth " 2309 "\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; 2310 def _v4_asi : NVPTXInst< 2311 (outs), 2312 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2313 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2314 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2315 "st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}" 2316 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};", []>; 2317} 2318 2319let mayStore=1, hasSideEffects=0 in { 2320 defm STV_i8 : ST_VEC<Int16Regs>; 2321 defm STV_i16 : ST_VEC<Int16Regs>; 2322 defm STV_i32 : ST_VEC<Int32Regs>; 2323 defm STV_i64 : ST_VEC<Int64Regs>; 2324 defm STV_f32 : ST_VEC<Float32Regs>; 2325 defm STV_f64 : ST_VEC<Float64Regs>; 2326} 2327 2328 2329//---- Conversion ---- 2330 2331class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn, 2332 NVPTXRegClass regclassOut> : 2333 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), 2334 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")), 2335 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; 2336 2337def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>; 2338def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>; 2339def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; 2340def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; 2341 2342// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where 2343// we cannot specify floating-point literals in isel patterns. Therefore, we 2344// use an integer selp to select either 1 or 0 and then cvt to floating-point. 2345 2346// sint -> f32 2347def : Pat<(f32 (sint_to_fp Int1Regs:$a)), 2348 (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 2349def : Pat<(f32 (sint_to_fp Int16Regs:$a)), 2350 (CVT_f32_s16 Int16Regs:$a, CvtRN)>; 2351def : Pat<(f32 (sint_to_fp Int32Regs:$a)), 2352 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 2353def : Pat<(f32 (sint_to_fp Int64Regs:$a)), 2354 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 2355 2356// uint -> f32 2357def : Pat<(f32 (uint_to_fp Int1Regs:$a)), 2358 (CVT_f32_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 2359def : Pat<(f32 (uint_to_fp Int16Regs:$a)), 2360 (CVT_f32_u16 Int16Regs:$a, CvtRN)>; 2361def : Pat<(f32 (uint_to_fp Int32Regs:$a)), 2362 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 2363def : Pat<(f32 (uint_to_fp Int64Regs:$a)), 2364 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 2365 2366// sint -> f64 2367def : Pat<(f64 (sint_to_fp Int1Regs:$a)), 2368 (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 2369def : Pat<(f64 (sint_to_fp Int16Regs:$a)), 2370 (CVT_f64_s16 Int16Regs:$a, CvtRN)>; 2371def : Pat<(f64 (sint_to_fp Int32Regs:$a)), 2372 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 2373def : Pat<(f64 (sint_to_fp Int64Regs:$a)), 2374 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 2375 2376// uint -> f64 2377def : Pat<(f64 (uint_to_fp Int1Regs:$a)), 2378 (CVT_f64_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; 2379def : Pat<(f64 (uint_to_fp Int16Regs:$a)), 2380 (CVT_f64_u16 Int16Regs:$a, CvtRN)>; 2381def : Pat<(f64 (uint_to_fp Int32Regs:$a)), 2382 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 2383def : Pat<(f64 (uint_to_fp Int64Regs:$a)), 2384 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 2385 2386 2387// f32 -> sint 2388def : Pat<(i1 (fp_to_sint Float32Regs:$a)), 2389 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; 2390def : Pat<(i16 (fp_to_sint Float32Regs:$a)), 2391 (CVT_s16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2392def : Pat<(i16 (fp_to_sint Float32Regs:$a)), 2393 (CVT_s16_f32 Float32Regs:$a, CvtRZI)>; 2394def : Pat<(i32 (fp_to_sint Float32Regs:$a)), 2395 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2396def : Pat<(i32 (fp_to_sint Float32Regs:$a)), 2397 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 2398def : Pat<(i64 (fp_to_sint Float32Regs:$a)), 2399 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2400def : Pat<(i64 (fp_to_sint Float32Regs:$a)), 2401 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 2402 2403// f32 -> uint 2404def : Pat<(i1 (fp_to_uint Float32Regs:$a)), 2405 (SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>; 2406def : Pat<(i16 (fp_to_uint Float32Regs:$a)), 2407 (CVT_u16_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2408def : Pat<(i16 (fp_to_uint Float32Regs:$a)), 2409 (CVT_u16_f32 Float32Regs:$a, CvtRZI)>; 2410def : Pat<(i32 (fp_to_uint Float32Regs:$a)), 2411 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2412def : Pat<(i32 (fp_to_uint Float32Regs:$a)), 2413 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 2414def : Pat<(i64 (fp_to_uint Float32Regs:$a)), 2415 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>; 2416def : Pat<(i64 (fp_to_uint Float32Regs:$a)), 2417 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 2418 2419// f64 -> sint 2420def : Pat<(i1 (fp_to_sint Float64Regs:$a)), 2421 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; 2422def : Pat<(i16 (fp_to_sint Float64Regs:$a)), 2423 (CVT_s16_f64 Float64Regs:$a, CvtRZI)>; 2424def : Pat<(i32 (fp_to_sint Float64Regs:$a)), 2425 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 2426def : Pat<(i64 (fp_to_sint Float64Regs:$a)), 2427 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 2428 2429// f64 -> uint 2430def : Pat<(i1 (fp_to_uint Float64Regs:$a)), 2431 (SETP_b64ri (BITCONVERT_64_F2I Float64Regs:$a), 0, CmpEQ)>; 2432def : Pat<(i16 (fp_to_uint Float64Regs:$a)), 2433 (CVT_u16_f64 Float64Regs:$a, CvtRZI)>; 2434def : Pat<(i32 (fp_to_uint Float64Regs:$a)), 2435 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 2436def : Pat<(i64 (fp_to_uint Float64Regs:$a)), 2437 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 2438 2439// sext i1 2440def : Pat<(i16 (sext Int1Regs:$a)), 2441 (SELP_s16ii -1, 0, Int1Regs:$a)>; 2442def : Pat<(i32 (sext Int1Regs:$a)), 2443 (SELP_s32ii -1, 0, Int1Regs:$a)>; 2444def : Pat<(i64 (sext Int1Regs:$a)), 2445 (SELP_s64ii -1, 0, Int1Regs:$a)>; 2446 2447// zext i1 2448def : Pat<(i16 (zext Int1Regs:$a)), 2449 (SELP_u16ii 1, 0, Int1Regs:$a)>; 2450def : Pat<(i32 (zext Int1Regs:$a)), 2451 (SELP_u32ii 1, 0, Int1Regs:$a)>; 2452def : Pat<(i64 (zext Int1Regs:$a)), 2453 (SELP_u64ii 1, 0, Int1Regs:$a)>; 2454 2455// anyext i1 2456def : Pat<(i16 (anyext Int1Regs:$a)), 2457 (SELP_u16ii -1, 0, Int1Regs:$a)>; 2458def : Pat<(i32 (anyext Int1Regs:$a)), 2459 (SELP_u32ii -1, 0, Int1Regs:$a)>; 2460def : Pat<(i64 (anyext Int1Regs:$a)), 2461 (SELP_u64ii -1, 0, Int1Regs:$a)>; 2462 2463// sext i16 2464def : Pat<(i32 (sext Int16Regs:$a)), 2465 (CVT_s32_s16 Int16Regs:$a, CvtNONE)>; 2466def : Pat<(i64 (sext Int16Regs:$a)), 2467 (CVT_s64_s16 Int16Regs:$a, CvtNONE)>; 2468 2469// zext i16 2470def : Pat<(i32 (zext Int16Regs:$a)), 2471 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; 2472def : Pat<(i64 (zext Int16Regs:$a)), 2473 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; 2474 2475// anyext i16 2476def : Pat<(i32 (anyext Int16Regs:$a)), 2477 (CVT_u32_u16 Int16Regs:$a, CvtNONE)>; 2478def : Pat<(i64 (anyext Int16Regs:$a)), 2479 (CVT_u64_u16 Int16Regs:$a, CvtNONE)>; 2480 2481// sext i32 2482def : Pat<(i64 (sext Int32Regs:$a)), 2483 (CVT_s64_s32 Int32Regs:$a, CvtNONE)>; 2484 2485// zext i32 2486def : Pat<(i64 (zext Int32Regs:$a)), 2487 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; 2488 2489// anyext i32 2490def : Pat<(i64 (anyext Int32Regs:$a)), 2491 (CVT_u64_u32 Int32Regs:$a, CvtNONE)>; 2492 2493 2494// truncate i64 2495def : Pat<(i32 (trunc Int64Regs:$a)), 2496 (CVT_u32_u64 Int64Regs:$a, CvtNONE)>; 2497def : Pat<(i16 (trunc Int64Regs:$a)), 2498 (CVT_u16_u64 Int64Regs:$a, CvtNONE)>; 2499def : Pat<(i1 (trunc Int64Regs:$a)), 2500 (SETP_b64ri (ANDb64ri Int64Regs:$a, 1), 1, CmpEQ)>; 2501 2502// truncate i32 2503def : Pat<(i16 (trunc Int32Regs:$a)), 2504 (CVT_u16_u32 Int32Regs:$a, CvtNONE)>; 2505def : Pat<(i1 (trunc Int32Regs:$a)), 2506 (SETP_b32ri (ANDb32ri Int32Regs:$a, 1), 1, CmpEQ)>; 2507 2508// truncate i16 2509def : Pat<(i1 (trunc Int16Regs:$a)), 2510 (SETP_b16ri (ANDb16ri Int16Regs:$a, 1), 1, CmpEQ)>; 2511 2512// sext_inreg 2513def : Pat<(sext_inreg Int16Regs:$a, i8), (CVT_INREG_s16_s8 Int16Regs:$a)>; 2514def : Pat<(sext_inreg Int32Regs:$a, i8), (CVT_INREG_s32_s8 Int32Regs:$a)>; 2515def : Pat<(sext_inreg Int32Regs:$a, i16), (CVT_INREG_s32_s16 Int32Regs:$a)>; 2516def : Pat<(sext_inreg Int64Regs:$a, i8), (CVT_INREG_s64_s8 Int64Regs:$a)>; 2517def : Pat<(sext_inreg Int64Regs:$a, i16), (CVT_INREG_s64_s16 Int64Regs:$a)>; 2518def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>; 2519 2520 2521// Select instructions with 32-bit predicates 2522def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), 2523 (SELP_b16rr Int16Regs:$a, Int16Regs:$b, 2524 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2525def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), 2526 (SELP_b32rr Int32Regs:$a, Int32Regs:$b, 2527 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2528def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), 2529 (SELP_b64rr Int64Regs:$a, Int64Regs:$b, 2530 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2531def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), 2532 (SELP_f32rr Float32Regs:$a, Float32Regs:$b, 2533 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2534def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), 2535 (SELP_f64rr Float64Regs:$a, Float64Regs:$b, 2536 (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; 2537 2538 2539let hasSideEffects = 0 in { 2540 // pack a set of smaller int registers to a larger int register 2541 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), 2542 (ins Int16Regs:$s1, Int16Regs:$s2, 2543 Int16Regs:$s3, Int16Regs:$s4), 2544 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>; 2545 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), 2546 (ins Int16Regs:$s1, Int16Regs:$s2), 2547 "mov.b32\t$d, {{$s1, $s2}};", []>; 2548 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), 2549 (ins Int32Regs:$s1, Int32Regs:$s2), 2550 "mov.b64\t$d, {{$s1, $s2}};", []>; 2551 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), 2552 (ins Float32Regs:$s1, Float32Regs:$s2), 2553 "mov.b64\t$d, {{$s1, $s2}};", []>; 2554 2555 // unpack a larger int register to a set of smaller int registers 2556 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, 2557 Int16Regs:$d3, Int16Regs:$d4), 2558 (ins Int64Regs:$s), 2559 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>; 2560 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), 2561 (ins Int32Regs:$s), 2562 "mov.b32\t{{$d1, $d2}}, $s;", []>; 2563 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), 2564 (ins Int64Regs:$s), 2565 "mov.b64\t{{$d1, $d2}}, $s;", []>; 2566 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), 2567 (ins Float64Regs:$s), 2568 "mov.b64\t{{$d1, $d2}}, $s;", []>; 2569} 2570 2571// Count leading zeros 2572let hasSideEffects = 0 in { 2573 def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 2574 "clz.b32\t$d, $a;", []>; 2575 def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2576 "clz.b64\t$d, $a;", []>; 2577} 2578 2579// 32-bit has a direct PTX instruction 2580def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>; 2581 2582// For 64-bit, the result in PTX is actually 32-bit so we zero-extend 2583// to 64-bit to match the LLVM semantics 2584def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; 2585 2586// For 16-bit, we zero-extend to 32-bit, then trunc the result back 2587// to 16-bits (ctlz of a 16-bit value is guaranteed to require less 2588// than 16 bits to store). We also need to subtract 16 because the 2589// high-order 16 zeros were counted. 2590def : Pat<(ctlz Int16Regs:$a), 2591 (SUBi16ri (CVT_u16_u32 (CLZr32 2592 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 2593 CvtNONE), 16)>; 2594 2595// Population count 2596let hasSideEffects = 0 in { 2597 def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 2598 "popc.b32\t$d, $a;", []>; 2599 def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2600 "popc.b64\t$d, $a;", []>; 2601} 2602 2603// 32-bit has a direct PTX instruction 2604def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>; 2605 2606// For 64-bit, the result in PTX is actually 32-bit so we zero-extend 2607// to 64-bit to match the LLVM semantics 2608def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>; 2609 2610// For 16-bit, we zero-extend to 32-bit, then trunc the result back 2611// to 16-bits (ctpop of a 16-bit value is guaranteed to require less 2612// than 16 bits to store) 2613def : Pat<(ctpop Int16Regs:$a), 2614 (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>; 2615 2616// fround f64 -> f32 2617def : Pat<(f32 (fround Float64Regs:$a)), 2618 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>; 2619def : Pat<(f32 (fround Float64Regs:$a)), 2620 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 2621 2622// fextend f32 -> f64 2623def : Pat<(f64 (fextend Float32Regs:$a)), 2624 (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; 2625def : Pat<(f64 (fextend Float32Regs:$a)), 2626 (CVT_f64_f32 Float32Regs:$a, CvtNONE)>; 2627 2628def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, 2629 [SDNPHasChain, SDNPOptInGlue]>; 2630 2631//----------------------------------- 2632// Control-flow 2633//----------------------------------- 2634 2635let isTerminator=1 in { 2636 let isReturn=1, isBarrier=1 in 2637 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>; 2638 2639 let isBranch=1 in 2640 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 2641 "@$a bra \t$target;", 2642 [(brcond Int1Regs:$a, bb:$target)]>; 2643 let isBranch=1 in 2644 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 2645 "@!$a bra \t$target;", []>; 2646 2647 let isBranch=1, isBarrier=1 in 2648 def GOTO : NVPTXInst<(outs), (ins brtarget:$target), 2649 "bra.uni \t$target;", [(br bb:$target)]>; 2650} 2651 2652def : Pat<(brcond Int32Regs:$a, bb:$target), 2653 (CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>; 2654 2655// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a 2656// conditional branch if the target block is the next block so that the code 2657// can fall through to the target block. The invertion is done by 'xor 2658// condition, 1', which will be translated to (setne condition, -1). Since ptx 2659// supports '@!pred bra target', we should use it. 2660def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), 2661 (CBranchOther Int1Regs:$a, bb:$target)>; 2662 2663// Call 2664def SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; 2665def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; 2666 2667def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, 2668 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; 2669def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, 2670 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 2671 SDNPSideEffect]>; 2672 2673def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; 2674def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, 2675 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 2676def calltarget : Operand<i32>; 2677let isCall=1 in { 2678 def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>; 2679} 2680 2681def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>; 2682def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>; 2683 2684// Pseudo instructions. 2685class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> 2686 : NVPTXInst<outs, ins, asmstr, pattern>; 2687 2688def Callseq_Start : 2689 NVPTXInst<(outs), (ins i32imm:$amt), 2690 "\\{ // callseq $amt\n" 2691 "\t.reg .b32 temp_param_reg;", 2692 [(callseq_start timm:$amt)]>; 2693def Callseq_End : 2694 NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), 2695 "\\} // callseq $amt1", 2696 [(callseq_end timm:$amt1, timm:$amt2)]>; 2697 2698// trap instruction 2699def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>; 2700 2701// Call prototype wrapper 2702def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 2703def CallPrototype : 2704 SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype, 2705 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 2706def ProtoIdent : Operand<i32> { 2707 let PrintMethod = "printProtoIdent"; 2708} 2709def CALL_PROTOTYPE : 2710 NVPTXInst<(outs), (ins ProtoIdent:$ident), 2711 "$ident", [(CallPrototype (i32 texternalsym:$ident))]>; 2712 2713 2714include "NVPTXIntrinsics.td" 2715 2716 2717//----------------------------------- 2718// Notes 2719//----------------------------------- 2720// BSWAP is currently expanded. The following is a more efficient 2721// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register 2722// - for sm_20, use pmpt (use vector scalar mov to get the pack and 2723// unpack). sm_20 supports native 32-bit register, but not native 16-bit 2724// register. 2725