1//===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the PTX instructions in TableGen format. 11// 12//===----------------------------------------------------------------------===// 13 14include "NVPTXInstrFormats.td" 15 16// A NOP instruction 17def NOP : NVPTXInst<(outs), (ins), "", []>; 18 19// List of vector specific properties 20def isVecLD : VecInstTypeEnum<1>; 21def isVecST : VecInstTypeEnum<2>; 22def isVecBuild : VecInstTypeEnum<3>; 23def isVecShuffle : VecInstTypeEnum<4>; 24def isVecExtract : VecInstTypeEnum<5>; 25def isVecInsert : VecInstTypeEnum<6>; 26def isVecDest : VecInstTypeEnum<7>; 27def isVecOther : VecInstTypeEnum<15>; 28 29//===----------------------------------------------------------------------===// 30// NVPTX Operand Definitions. 31//===----------------------------------------------------------------------===// 32 33def brtarget : Operand<OtherVT>; 34 35//===----------------------------------------------------------------------===// 36// NVPTX Instruction Predicate Definitions 37//===----------------------------------------------------------------------===// 38 39 40def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">; 41def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">; 42def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">; 43def useAtomRedG32forGen32 : 44 Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">; 45def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">; 46def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">; 47def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">; 48def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">; 49def useAtomRedG64forGen64 : 50 Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">; 51def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">; 52def hasVote : Predicate<"Subtarget.hasVote()">; 53def hasDouble : Predicate<"Subtarget.hasDouble()">; 54def reqPTX20 : Predicate<"Subtarget.reqPTX20()">; 55def hasLDG : Predicate<"Subtarget.hasLDG()">; 56def hasLDU : Predicate<"Subtarget.hasLDU()">; 57def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">; 58 59def doF32FTZ : Predicate<"UseF32FTZ">; 60 61def doFMAF32 : Predicate<"doFMAF32">; 62def doFMAF32_ftz : Predicate<"(doFMAF32 && UseF32FTZ)">; 63def doFMAF32AGG : Predicate<"doFMAF32AGG">; 64def doFMAF32AGG_ftz : Predicate<"(doFMAF32AGG && UseF32FTZ)">; 65def doFMAF64 : Predicate<"doFMAF64">; 66def doFMAF64AGG : Predicate<"doFMAF64AGG">; 67def doFMADF32 : Predicate<"doFMADF32">; 68def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">; 69 70def doMulWide : Predicate<"doMulWide">; 71 72def allowFMA : Predicate<"allowFMA">; 73def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">; 74 75def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">; 76def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">; 77 78def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; 79 80def true : Predicate<"1">; 81 82//===----------------------------------------------------------------------===// 83// Special Handling for 8-bit Operands and Operations 84// 85// PTX supports 8-bit signed and unsigned types, but does not support 8-bit 86// operations (like add, shift, etc) except for ld/st/cvt. SASS does not have 87// 8-bit registers. 88// 89// PTX ld, st and cvt instructions permit source and destination data operands 90// to be wider than the instruction-type size, so that narrow values may be 91// loaded, stored, and converted using regular-width registers. 92// 93// So in PTX generation, we 94// - always use 16-bit registers in place in 8-bit registers. 95// (8-bit variables should stay as 8-bit as they represent memory layout.) 96// - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values 97// before operation 98// . div 99// . rem 100// . neg (sign) 101// . set, setp 102// . shr 103// 104// We are patching the operations by inserting the cvt instructions in the 105// asm strings of the affected instructions. 106// 107// Since vector operations, except for ld/st, are eventually elementized. We 108// do not need to special-hand the vector 8-bit operations. 109// 110// 111//===----------------------------------------------------------------------===// 112 113// Generate string block like 114// { 115// .reg .s16 %temp1; 116// .reg .s16 %temp2; 117// cvt.s16.s8 %temp1, %a; 118// cvt.s16.s8 %temp2, %b; 119// opc.s16 %dst, %temp1, %temp2; 120// } 121// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 122class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> { 123 string s = !strconcat("{{\n\t", 124 !strconcat(".reg .", !strconcat(TypeStr, 125 !strconcat(" \t%temp1;\n\t", 126 !strconcat(".reg .", !strconcat(TypeStr, 127 !strconcat(" \t%temp2;\n\t", 128 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", 129 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", 130 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))); 131} 132 133// Generate string block like 134// { 135// .reg .s16 %temp1; 136// .reg .s16 %temp2; 137// cvt.s16.s8 %temp1, %a; 138// mov.b16 %temp2, %b; 139// cvt.s16.s8 %temp2, %temp2; 140// opc.s16 %dst, %temp1, %temp2; 141// } 142// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 143class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> { 144 string s = !strconcat("{{\n\t", 145 !strconcat(".reg .", !strconcat(TypeStr, 146 !strconcat(" \t%temp1;\n\t", 147 !strconcat(".reg .", 148 !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t", 149 !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t", 150 !strconcat("mov.b16 \t%temp2, $b;\n\t", 151 !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t", 152 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); 153} 154 155// Generate string block like 156// { 157// .reg .s16 %temp1; 158// .reg .s16 %temp2; 159// mov.b16 %temp1, %b; 160// cvt.s16.s8 %temp1, %temp1; 161// cvt.s16.s8 %temp2, %a; 162// opc.s16 %dst, %temp1, %temp2; 163// } 164// when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8 165class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> { 166 string s = !strconcat("{{\n\t", 167 !strconcat(".reg .", !strconcat(TypeStr, 168 !strconcat(" \t%temp1;\n\t", 169 !strconcat(".reg .", !strconcat(TypeStr, 170 !strconcat(" \t%temp2;\n\t", 171 !strconcat("mov.b16 \t%temp1, $a;\n\t", 172 !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t", 173 !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t", 174 !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))))); 175} 176 177 178//===----------------------------------------------------------------------===// 179// Some Common Instruction Class Templates 180//===----------------------------------------------------------------------===// 181 182multiclass I3<string OpcStr, SDNode OpNode> { 183 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 184 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 185 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 186 Int64Regs:$b))]>; 187 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 188 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 189 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 190 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 191 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 192 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 193 Int32Regs:$b))]>; 194 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 195 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 196 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 197 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 198 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 199 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 200 Int16Regs:$b))]>; 201 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 202 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 203 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; 204 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 205 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 206 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 207 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 208 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 209 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; 210} 211 212multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> { 213 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 214 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 215 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 216 Int64Regs:$b))]>; 217 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 218 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 219 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 220 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 221 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 222 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 223 Int32Regs:$b))]>; 224 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 225 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 226 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 227 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 228 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 229 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 230 Int16Regs:$b))]>; 231 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 232 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 233 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; 234 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 235 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s, 236 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 237 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 238 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s, 239 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>; 240} 241 242multiclass I3_noi8<string OpcStr, SDNode OpNode> { 243 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 244 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 245 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 246 Int64Regs:$b))]>; 247 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 248 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 249 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 250 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 251 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 252 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 253 Int32Regs:$b))]>; 254 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 255 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 256 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 257 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 258 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 259 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 260 Int16Regs:$b))]>; 261 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 262 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 263 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>; 264} 265 266multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> { 267 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 268 Int32Regs:$b), 269 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 270 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 271 Int32Regs:$b))]>; 272 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 273 !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"), 274 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 275} 276 277multiclass F3<string OpcStr, SDNode OpNode> { 278 def f64rr : NVPTXInst<(outs Float64Regs:$dst), 279 (ins Float64Regs:$a, Float64Regs:$b), 280 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 281 [(set Float64Regs:$dst, 282 (OpNode Float64Regs:$a, Float64Regs:$b))]>, 283 Requires<[allowFMA]>; 284 def f64ri : NVPTXInst<(outs Float64Regs:$dst), 285 (ins Float64Regs:$a, f64imm:$b), 286 !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"), 287 [(set Float64Regs:$dst, 288 (OpNode Float64Regs:$a, fpimm:$b))]>, 289 Requires<[allowFMA]>; 290 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 291 (ins Float32Regs:$a, Float32Regs:$b), 292 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 293 [(set Float32Regs:$dst, 294 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 295 Requires<[allowFMA_ftz]>; 296 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 297 (ins Float32Regs:$a, f32imm:$b), 298 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"), 299 [(set Float32Regs:$dst, 300 (OpNode Float32Regs:$a, fpimm:$b))]>, 301 Requires<[allowFMA_ftz]>; 302 def f32rr : NVPTXInst<(outs Float32Regs:$dst), 303 (ins Float32Regs:$a, Float32Regs:$b), 304 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 305 [(set Float32Regs:$dst, 306 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 307 Requires<[allowFMA]>; 308 def f32ri : NVPTXInst<(outs Float32Regs:$dst), 309 (ins Float32Regs:$a, f32imm:$b), 310 !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"), 311 [(set Float32Regs:$dst, 312 (OpNode Float32Regs:$a, fpimm:$b))]>, 313 Requires<[allowFMA]>; 314} 315 316multiclass F3_rn<string OpcStr, SDNode OpNode> { 317 def f64rr : NVPTXInst<(outs Float64Regs:$dst), 318 (ins Float64Regs:$a, Float64Regs:$b), 319 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 320 [(set Float64Regs:$dst, 321 (OpNode Float64Regs:$a, Float64Regs:$b))]>; 322 def f64ri : NVPTXInst<(outs Float64Regs:$dst), 323 (ins Float64Regs:$a, f64imm:$b), 324 !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"), 325 [(set Float64Regs:$dst, 326 (OpNode Float64Regs:$a, fpimm:$b))]>; 327 def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 328 (ins Float32Regs:$a, Float32Regs:$b), 329 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 330 [(set Float32Regs:$dst, 331 (OpNode Float32Regs:$a, Float32Regs:$b))]>, 332 Requires<[doF32FTZ]>; 333 def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 334 (ins Float32Regs:$a, f32imm:$b), 335 !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"), 336 [(set Float32Regs:$dst, 337 (OpNode Float32Regs:$a, fpimm:$b))]>, 338 Requires<[doF32FTZ]>; 339 def f32rr : NVPTXInst<(outs Float32Regs:$dst), 340 (ins Float32Regs:$a, Float32Regs:$b), 341 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 342 [(set Float32Regs:$dst, 343 (OpNode Float32Regs:$a, Float32Regs:$b))]>; 344 def f32ri : NVPTXInst<(outs Float32Regs:$dst), 345 (ins Float32Regs:$a, f32imm:$b), 346 !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"), 347 [(set Float32Regs:$dst, 348 (OpNode Float32Regs:$a, fpimm:$b))]>; 349} 350 351multiclass F2<string OpcStr, SDNode OpNode> { 352 def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a), 353 !strconcat(OpcStr, ".f64 \t$dst, $a;"), 354 [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>; 355 def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 356 !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"), 357 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>, 358 Requires<[doF32FTZ]>; 359 def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a), 360 !strconcat(OpcStr, ".f32 \t$dst, $a;"), 361 [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>; 362} 363 364//===----------------------------------------------------------------------===// 365// NVPTX Instructions. 366//===----------------------------------------------------------------------===// 367 368//----------------------------------- 369// Integer Arithmetic 370//----------------------------------- 371 372multiclass ADD_SUB_i1<SDNode OpNode> { 373 def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 374 "xor.pred \t$dst, $a, $b;", 375 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 376 def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 377 "xor.pred \t$dst, $a, $b;", 378 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>; 379} 380 381defm ADD_i1 : ADD_SUB_i1<add>; 382defm SUB_i1 : ADD_SUB_i1<sub>; 383 384 385defm ADD : I3<"add.s", add>; 386defm SUB : I3<"sub.s", sub>; 387 388defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>; 389defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>; 390 391defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>; 392defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>; 393 394//mul.wide PTX instruction 395def SInt32Const : PatLeaf<(imm), [{ 396 const APInt &v = N->getAPIntValue(); 397 if (v.isSignedIntN(32)) 398 return true; 399 return false; 400}]>; 401 402def UInt32Const : PatLeaf<(imm), [{ 403 const APInt &v = N->getAPIntValue(); 404 if (v.isIntN(32)) 405 return true; 406 return false; 407}]>; 408 409def SInt16Const : PatLeaf<(imm), [{ 410 const APInt &v = N->getAPIntValue(); 411 if (v.isSignedIntN(16)) 412 return true; 413 return false; 414}]>; 415 416def UInt16Const : PatLeaf<(imm), [{ 417 const APInt &v = N->getAPIntValue(); 418 if (v.isIntN(16)) 419 return true; 420 return false; 421}]>; 422 423def Int5Const : PatLeaf<(imm), [{ 424 const APInt &v = N->getAPIntValue(); 425 // Check if 0 <= v < 32 426 // Only then the result from (x << v) will be i32 427 if (v.sge(0) && v.slt(32)) 428 return true; 429 return false; 430}]>; 431 432def Int4Const : PatLeaf<(imm), [{ 433 const APInt &v = N->getAPIntValue(); 434 // Check if 0 <= v < 16 435 // Only then the result from (x << v) will be i16 436 if (v.sge(0) && v.slt(16)) 437 return true; 438 return false; 439}]>; 440 441def SHL2MUL32 : SDNodeXForm<imm, [{ 442 const APInt &v = N->getAPIntValue(); 443 APInt temp(32, 1); 444 return CurDAG->getTargetConstant(temp.shl(v), MVT::i32); 445}]>; 446 447def SHL2MUL16 : SDNodeXForm<imm, [{ 448 const APInt &v = N->getAPIntValue(); 449 APInt temp(16, 1); 450 return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); 451}]>; 452 453def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst), 454 (ins Int32Regs:$a, Int32Regs:$b), 455 "mul.wide.s32 \t$dst, $a, $b;", []>; 456def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst), 457 (ins Int32Regs:$a, i64imm:$b), 458 "mul.wide.s32 \t$dst, $a, $b;", []>; 459 460def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst), 461 (ins Int32Regs:$a, Int32Regs:$b), 462 "mul.wide.u32 \t$dst, $a, $b;", []>; 463def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst), 464 (ins Int32Regs:$a, i64imm:$b), 465 "mul.wide.u32 \t$dst, $a, $b;", []>; 466 467def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst), 468 (ins Int16Regs:$a, Int16Regs:$b), 469 "mul.wide.s16 \t$dst, $a, $b;", []>; 470def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst), 471 (ins Int16Regs:$a, i32imm:$b), 472 "mul.wide.s16 \t$dst, $a, $b;", []>; 473 474def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst), 475 (ins Int16Regs:$a, Int16Regs:$b), 476 "mul.wide.u16 \t$dst, $a, $b;", []>; 477def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst), 478 (ins Int16Regs:$a, i32imm:$b), 479 "mul.wide.u16 \t$dst, $a, $b;", []>; 480 481def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), 482 (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 483 Requires<[doMulWide]>; 484def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)), 485 (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, 486 Requires<[doMulWide]>; 487 488def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)), 489 (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 490 Requires<[doMulWide]>; 491def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)), 492 (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>, 493 Requires<[doMulWide]>; 494 495def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), 496 (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, 497 Requires<[doMulWide]>; 498def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), 499 (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>, 500 Requires<[doMulWide]>; 501 502def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), 503 (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; 504def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), 505 (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>, 506 Requires<[doMulWide]>; 507 508def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), 509 (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; 510def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), 511 (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>, 512 Requires<[doMulWide]>; 513 514def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), 515 (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; 516def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), 517 (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>, 518 Requires<[doMulWide]>; 519 520defm MULT : I3<"mul.lo.s", mul>; 521 522defm MULTHS : I3_noi8<"mul.hi.s", mulhs>; 523defm MULTHU : I3_noi8<"mul.hi.u", mulhu>; 524def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 525 !strconcat("{{ \n\t", 526 !strconcat(".reg \t.s16 temp1; \n\t", 527 !strconcat(".reg \t.s16 temp2; \n\t", 528 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", 529 !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t", 530 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", 531 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", 532 !strconcat("}}", "")))))))), 533 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>; 534def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 535 !strconcat("{{ \n\t", 536 !strconcat(".reg \t.s16 temp1; \n\t", 537 !strconcat(".reg \t.s16 temp2; \n\t", 538 !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t", 539 !strconcat("mov.b16 \ttemp2, $b; \n\t", 540 !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t", 541 !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t", 542 !strconcat("shr.s16 \t$dst, $dst, 8; \n\t", 543 !strconcat("}}", ""))))))))), 544 [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>; 545def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 546 !strconcat("{{ \n\t", 547 !strconcat(".reg \t.u16 temp1; \n\t", 548 !strconcat(".reg \t.u16 temp2; \n\t", 549 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", 550 !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t", 551 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", 552 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", 553 !strconcat("}}", "")))))))), 554 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>; 555def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 556 !strconcat("{{ \n\t", 557 !strconcat(".reg \t.u16 temp1; \n\t", 558 !strconcat(".reg \t.u16 temp2; \n\t", 559 !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t", 560 !strconcat("mov.b16 \ttemp2, $b; \n\t", 561 !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t", 562 !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t", 563 !strconcat("shr.u16 \t$dst, $dst, 8; \n\t", 564 !strconcat("}}", ""))))))))), 565 [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>; 566 567 568defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">; 569defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">; 570 571defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">; 572// The ri version will not be selected as DAGCombiner::visitSREM will lower it. 573defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">; 574// The ri version will not be selected as DAGCombiner::visitUREM will lower it. 575 576def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst), 577 (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c), 578 "mad.lo.s16 \t$dst, $a, $b, $c;", 579 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), 580 Int8Regs:$c))]>; 581def MAD8rri : NVPTXInst<(outs Int8Regs:$dst), 582 (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c), 583 "mad.lo.s16 \t$dst, $a, $b, $c;", 584 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b), 585 imm:$c))]>; 586def MAD8rir : NVPTXInst<(outs Int8Regs:$dst), 587 (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c), 588 "mad.lo.s16 \t$dst, $a, $b, $c;", 589 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), 590 Int8Regs:$c))]>; 591def MAD8rii : NVPTXInst<(outs Int8Regs:$dst), 592 (ins Int8Regs:$a, i8imm:$b, i8imm:$c), 593 "mad.lo.s16 \t$dst, $a, $b, $c;", 594 [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b), 595 imm:$c))]>; 596 597def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), 598 (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), 599 "mad.lo.s16 \t$dst, $a, $b, $c;", 600 [(set Int16Regs:$dst, (add 601 (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>; 602def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), 603 (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), 604 "mad.lo.s16 \t$dst, $a, $b, $c;", 605 [(set Int16Regs:$dst, (add 606 (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>; 607def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), 608 (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), 609 "mad.lo.s16 \t$dst, $a, $b, $c;", 610 [(set Int16Regs:$dst, (add 611 (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>; 612def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), 613 (ins Int16Regs:$a, i16imm:$b, i16imm:$c), 614 "mad.lo.s16 \t$dst, $a, $b, $c;", 615 [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b), 616 imm:$c))]>; 617 618def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), 619 (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), 620 "mad.lo.s32 \t$dst, $a, $b, $c;", 621 [(set Int32Regs:$dst, (add 622 (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>; 623def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), 624 (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), 625 "mad.lo.s32 \t$dst, $a, $b, $c;", 626 [(set Int32Regs:$dst, (add 627 (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>; 628def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), 629 (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), 630 "mad.lo.s32 \t$dst, $a, $b, $c;", 631 [(set Int32Regs:$dst, (add 632 (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>; 633def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), 634 (ins Int32Regs:$a, i32imm:$b, i32imm:$c), 635 "mad.lo.s32 \t$dst, $a, $b, $c;", 636 [(set Int32Regs:$dst, (add 637 (mul Int32Regs:$a, imm:$b), imm:$c))]>; 638 639def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), 640 (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), 641 "mad.lo.s64 \t$dst, $a, $b, $c;", 642 [(set Int64Regs:$dst, (add 643 (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>; 644def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), 645 (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), 646 "mad.lo.s64 \t$dst, $a, $b, $c;", 647 [(set Int64Regs:$dst, (add 648 (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>; 649def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), 650 (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), 651 "mad.lo.s64 \t$dst, $a, $b, $c;", 652 [(set Int64Regs:$dst, (add 653 (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>; 654def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), 655 (ins Int64Regs:$a, i64imm:$b, i64imm:$c), 656 "mad.lo.s64 \t$dst, $a, $b, $c;", 657 [(set Int64Regs:$dst, (add 658 (mul Int64Regs:$a, imm:$b), imm:$c))]>; 659 660 661def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), 662 !strconcat("cvt.s16.s8 \t$dst, $src;\n\t", 663 "neg.s16 \t$dst, $dst;"), 664 [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>; 665def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 666 "neg.s16 \t$dst, $src;", 667 [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>; 668def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 669 "neg.s32 \t$dst, $src;", 670 [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>; 671def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 672 "neg.s64 \t$dst, $src;", 673 [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>; 674 675//----------------------------------- 676// Floating Point Arithmetic 677//----------------------------------- 678 679// Constant 1.0f 680def FloatConst1 : PatLeaf<(fpimm), [{ 681 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle) 682 return false; 683 float f = (float)N->getValueAPF().convertToFloat(); 684 return (f==1.0f); 685}]>; 686// Constand (double)1.0 687def DoubleConst1 : PatLeaf<(fpimm), [{ 688 if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble) 689 return false; 690 double d = (double)N->getValueAPF().convertToDouble(); 691 return (d==1.0); 692}]>; 693 694defm FADD : F3<"add", fadd>; 695defm FSUB : F3<"sub", fsub>; 696defm FMUL : F3<"mul", fmul>; 697 698defm FADD_rn : F3_rn<"add", fadd>; 699defm FSUB_rn : F3_rn<"sub", fsub>; 700defm FMUL_rn : F3_rn<"mul", fmul>; 701 702defm FABS : F2<"abs", fabs>; 703defm FNEG : F2<"neg", fneg>; 704defm FSQRT : F2<"sqrt.rn", fsqrt>; 705 706// 707// F64 division 708// 709def FDIV641r : NVPTXInst<(outs Float64Regs:$dst), 710 (ins f64imm:$a, Float64Regs:$b), 711 "rcp.rn.f64 \t$dst, $b;", 712 [(set Float64Regs:$dst, 713 (fdiv DoubleConst1:$a, Float64Regs:$b))]>; 714def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst), 715 (ins Float64Regs:$a, Float64Regs:$b), 716 "div.rn.f64 \t$dst, $a, $b;", 717 [(set Float64Regs:$dst, 718 (fdiv Float64Regs:$a, Float64Regs:$b))]>; 719def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst), 720 (ins Float64Regs:$a, f64imm:$b), 721 "div.rn.f64 \t$dst, $a, $b;", 722 [(set Float64Regs:$dst, 723 (fdiv Float64Regs:$a, fpimm:$b))]>; 724 725// 726// F32 Approximate reciprocal 727// 728def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst), 729 (ins f32imm:$a, Float32Regs:$b), 730 "rcp.approx.ftz.f32 \t$dst, $b;", 731 [(set Float32Regs:$dst, 732 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 733 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 734def FDIV321r : NVPTXInst<(outs Float32Regs:$dst), 735 (ins f32imm:$a, Float32Regs:$b), 736 "rcp.approx.f32 \t$dst, $b;", 737 [(set Float32Regs:$dst, 738 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 739 Requires<[do_DIVF32_APPROX]>; 740// 741// F32 Approximate division 742// 743def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst), 744 (ins Float32Regs:$a, Float32Regs:$b), 745 "div.approx.ftz.f32 \t$dst, $a, $b;", 746 [(set Float32Regs:$dst, 747 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 748 Requires<[do_DIVF32_APPROX, doF32FTZ]>; 749def FDIV32approxrr : NVPTXInst<(outs Float32Regs:$dst), 750 (ins Float32Regs:$a, Float32Regs:$b), 751 "div.approx.f32 \t$dst, $a, $b;", 752 [(set Float32Regs:$dst, 753 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 754 Requires<[do_DIVF32_APPROX]>; 755// 756// F32 Semi-accurate reciprocal 757// 758// rcp.approx gives the same result as div.full(1.0f, a) and is faster. 759// 760def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst), 761 (ins f32imm:$a, Float32Regs:$b), 762 "rcp.approx.ftz.f32 \t$dst, $b;", 763 [(set Float32Regs:$dst, 764 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 765 Requires<[do_DIVF32_FULL, doF32FTZ]>; 766def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst), 767 (ins f32imm:$a, Float32Regs:$b), 768 "rcp.approx.f32 \t$dst, $b;", 769 [(set Float32Regs:$dst, 770 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 771 Requires<[do_DIVF32_FULL]>; 772// 773// F32 Semi-accurate division 774// 775def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst), 776 (ins Float32Regs:$a, Float32Regs:$b), 777 "div.full.ftz.f32 \t$dst, $a, $b;", 778 [(set Float32Regs:$dst, 779 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 780 Requires<[do_DIVF32_FULL, doF32FTZ]>; 781def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst), 782 (ins Float32Regs:$a, f32imm:$b), 783 "div.full.ftz.f32 \t$dst, $a, $b;", 784 [(set Float32Regs:$dst, 785 (fdiv Float32Regs:$a, fpimm:$b))]>, 786 Requires<[do_DIVF32_FULL, doF32FTZ]>; 787def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst), 788 (ins Float32Regs:$a, Float32Regs:$b), 789 "div.full.f32 \t$dst, $a, $b;", 790 [(set Float32Regs:$dst, 791 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 792 Requires<[do_DIVF32_FULL]>; 793def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst), 794 (ins Float32Regs:$a, f32imm:$b), 795 "div.full.f32 \t$dst, $a, $b;", 796 [(set Float32Regs:$dst, 797 (fdiv Float32Regs:$a, fpimm:$b))]>, 798 Requires<[do_DIVF32_FULL]>; 799// 800// F32 Accurate reciprocal 801// 802def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), 803 (ins f32imm:$a, Float32Regs:$b), 804 "rcp.rn.ftz.f32 \t$dst, $b;", 805 [(set Float32Regs:$dst, 806 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 807 Requires<[reqPTX20, doF32FTZ]>; 808def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst), 809 (ins f32imm:$a, Float32Regs:$b), 810 "rcp.rn.f32 \t$dst, $b;", 811 [(set Float32Regs:$dst, 812 (fdiv FloatConst1:$a, Float32Regs:$b))]>, 813 Requires<[reqPTX20]>; 814// 815// F32 Accurate division 816// 817def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), 818 (ins Float32Regs:$a, Float32Regs:$b), 819 "div.rn.ftz.f32 \t$dst, $a, $b;", 820 [(set Float32Regs:$dst, 821 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 822 Requires<[doF32FTZ, reqPTX20]>; 823def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst), 824 (ins Float32Regs:$a, f32imm:$b), 825 "div.rn.ftz.f32 \t$dst, $a, $b;", 826 [(set Float32Regs:$dst, 827 (fdiv Float32Regs:$a, fpimm:$b))]>, 828 Requires<[doF32FTZ, reqPTX20]>; 829def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst), 830 (ins Float32Regs:$a, Float32Regs:$b), 831 "div.rn.f32 \t$dst, $a, $b;", 832 [(set Float32Regs:$dst, 833 (fdiv Float32Regs:$a, Float32Regs:$b))]>, 834 Requires<[reqPTX20]>; 835def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst), 836 (ins Float32Regs:$a, f32imm:$b), 837 "div.rn.f32 \t$dst, $a, $b;", 838 [(set Float32Regs:$dst, 839 (fdiv Float32Regs:$a, fpimm:$b))]>, 840 Requires<[reqPTX20]>; 841 842 843multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { 844 def rrr : NVPTXInst<(outs Float32Regs:$dst), 845 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), 846 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 847 [(set Float32Regs:$dst, (fadd 848 (fmul Float32Regs:$a, Float32Regs:$b), 849 Float32Regs:$c))]>, Requires<[Pred]>; 850 // This is to WAR a weird bug in Tablegen that does not automatically 851 // generate the following permutated rule rrr2 from the above rrr. 852 // So we explicitly add it here. This happens to FMA32 only. 853 // See the comments at FMAD32 and FMA32 for more information. 854 def rrr2 : NVPTXInst<(outs Float32Regs:$dst), 855 (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), 856 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 857 [(set Float32Regs:$dst, (fadd Float32Regs:$c, 858 (fmul Float32Regs:$a, Float32Regs:$b)))]>, 859 Requires<[Pred]>; 860 def rri : NVPTXInst<(outs Float32Regs:$dst), 861 (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), 862 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 863 [(set Float32Regs:$dst, (fadd 864 (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>, 865 Requires<[Pred]>; 866 def rir : NVPTXInst<(outs Float32Regs:$dst), 867 (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), 868 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 869 [(set Float32Regs:$dst, (fadd 870 (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>, 871 Requires<[Pred]>; 872 def rii : NVPTXInst<(outs Float32Regs:$dst), 873 (ins Float32Regs:$a, f32imm:$b, f32imm:$c), 874 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 875 [(set Float32Regs:$dst, (fadd 876 (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>, 877 Requires<[Pred]>; 878} 879 880multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { 881 def rrr : NVPTXInst<(outs Float64Regs:$dst), 882 (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), 883 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 884 [(set Float64Regs:$dst, (fadd 885 (fmul Float64Regs:$a, Float64Regs:$b), 886 Float64Regs:$c))]>, Requires<[Pred]>; 887 def rri : NVPTXInst<(outs Float64Regs:$dst), 888 (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), 889 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 890 [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a, 891 Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>; 892 def rir : NVPTXInst<(outs Float64Regs:$dst), 893 (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), 894 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 895 [(set Float64Regs:$dst, (fadd 896 (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>, 897 Requires<[Pred]>; 898 def rii : NVPTXInst<(outs Float64Regs:$dst), 899 (ins Float64Regs:$a, f64imm:$b, f64imm:$c), 900 !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), 901 [(set Float64Regs:$dst, (fadd 902 (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>, 903 Requires<[Pred]>; 904} 905 906// Due to a unknown reason (most likely a bug in tablegen), tablegen does not 907// automatically generate the rrr2 rule from 908// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32. 909// If we reverse the order of the following two lines, then rrr2 rule will be 910// generated for FMA32, but not for rrr. 911// Therefore, we manually write the rrr2 rule in FPCONTRACT32. 912defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>; 913defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>; 914defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>; 915defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>; 916defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>; 917 918// b*c-a => fmad(b, c, -a) 919multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> { 920 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), 921 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, 922 Requires<[Pred]>; 923} 924 925// a-b*c => fmad(-b,c, a) 926// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c 927// b*c-a => fmad(b, c, -a) 928// - legal because b*c-a <=> b*c+(-a) 929multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 930 def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)), 931 (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>, 932 Requires<[Pred]>; 933 def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), 934 (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, 935 Requires<[Pred]>; 936} 937 938// a-b*c => fmad(-b,c, a) 939// b*c-a => fmad(b, c, -a) 940multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { 941 def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)), 942 (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>, 943 Requires<[Pred]>; 944 945 def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a), 946 (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>, 947 Requires<[Pred]>; 948} 949 950defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>; 951defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>; 952defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>; 953defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>; 954defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>; 955 956def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 957 "sin.approx.f32 \t$dst, $src;", 958 [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; 959def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 960 "cos.approx.f32 \t$dst, $src;", 961 [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; 962 963//----------------------------------- 964// Logical Arithmetic 965//----------------------------------- 966 967multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> { 968 def b1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b), 969 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 970 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>; 971 def b1ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b), 972 !strconcat(OpcStr, ".pred \t$dst, $a, $b;"), 973 [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>; 974 def b8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 975 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 976 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 977 def b8ri: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 978 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 979 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; 980 def b16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 981 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 982 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 983 Int16Regs:$b))]>; 984 def b16ri: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 985 !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"), 986 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; 987 def b32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 988 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 989 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 990 Int32Regs:$b))]>; 991 def b32ri: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 992 !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"), 993 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 994 def b64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 995 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 996 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 997 Int64Regs:$b))]>; 998 def b64ri: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 999 !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"), 1000 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 1001} 1002 1003defm OR : LOG_FORMAT<"or", or>; 1004defm AND : LOG_FORMAT<"and", and>; 1005defm XOR : LOG_FORMAT<"xor", xor>; 1006 1007def NOT1: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src), 1008 "not.pred \t$dst, $src;", 1009 [(set Int1Regs:$dst, (not Int1Regs:$src))]>; 1010def NOT8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), 1011 "not.b16 \t$dst, $src;", 1012 [(set Int8Regs:$dst, (not Int8Regs:$src))]>; 1013def NOT16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 1014 "not.b16 \t$dst, $src;", 1015 [(set Int16Regs:$dst, (not Int16Regs:$src))]>; 1016def NOT32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src), 1017 "not.b32 \t$dst, $src;", 1018 [(set Int32Regs:$dst, (not Int32Regs:$src))]>; 1019def NOT64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src), 1020 "not.b64 \t$dst, $src;", 1021 [(set Int64Regs:$dst, (not Int64Regs:$src))]>; 1022 1023// For shifts, the second src operand must be 32-bit value 1024multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> { 1025 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, 1026 Int32Regs:$b), 1027 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1028 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1029 Int32Regs:$b))]>; 1030 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), 1031 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1032 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1033 (i32 imm:$b)))]>; 1034 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1035 Int32Regs:$b), 1036 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1037 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1038 Int32Regs:$b))]>; 1039 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1040 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1041 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1042 (i32 imm:$b)))]>; 1043 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), 1044 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1045 [(set Int32Regs:$dst, (OpNode (i32 imm:$a), 1046 (i32 imm:$b)))]>; 1047 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, 1048 Int32Regs:$b), 1049 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1050 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1051 Int32Regs:$b))]>; 1052 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 1053 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1054 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1055 (i32 imm:$b)))]>; 1056 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), 1057 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1058 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, 1059 Int32Regs:$b))]>; 1060 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), 1061 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1062 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, 1063 (i32 imm:$b)))]>; 1064} 1065 1066defm SHL : LSHIFT_FORMAT<"shl.b", shl>; 1067 1068// For shifts, the second src operand must be 32-bit value 1069// Need to add cvt for the 8-bits. 1070multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> { 1071 def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, 1072 Int32Regs:$b), 1073 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1074 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1075 Int32Regs:$b))]>; 1076 def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b), 1077 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1078 [(set Int64Regs:$dst, (OpNode Int64Regs:$a, 1079 (i32 imm:$b)))]>; 1080 def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1081 Int32Regs:$b), 1082 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1083 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1084 Int32Regs:$b))]>; 1085 def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1086 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1087 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, 1088 (i32 imm:$b)))]>; 1089 def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b), 1090 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1091 [(set Int32Regs:$dst, (OpNode (i32 imm:$a), 1092 (i32 imm:$b)))]>; 1093 def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, 1094 Int32Regs:$b), 1095 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1096 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1097 Int32Regs:$b))]>; 1098 def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b), 1099 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1100 [(set Int16Regs:$dst, (OpNode Int16Regs:$a, 1101 (i32 imm:$b)))]>; 1102 def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b), 1103 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", 1104 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), 1105 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, 1106 Int32Regs:$b))]>; 1107 def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b), 1108 !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t", 1109 !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))), 1110 [(set Int8Regs:$dst, (OpNode Int8Regs:$a, 1111 (i32 imm:$b)))]>; 1112} 1113 1114defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">; 1115defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">; 1116 1117// 32bit 1118def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), 1119 (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), 1120 !strconcat("{{\n\t", 1121 !strconcat(".reg .b32 %lhs;\n\t", 1122 !strconcat(".reg .b32 %rhs;\n\t", 1123 !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t", 1124 !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t", 1125 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1126 !strconcat("}}", ""))))))), 1127 []>; 1128 1129def SUB_FRM_32 : SDNodeXForm<imm, [{ 1130 return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32); 1131}]>; 1132 1133def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), 1134 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>; 1135def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), 1136 (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>; 1137 1138def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, 1139 Int32Regs:$amt), 1140 !strconcat("{{\n\t", 1141 !strconcat(".reg .b32 %lhs;\n\t", 1142 !strconcat(".reg .b32 %rhs;\n\t", 1143 !strconcat(".reg .b32 %amt2;\n\t", 1144 !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t", 1145 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", 1146 !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", 1147 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1148 !strconcat("}}", ""))))))))), 1149 [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>; 1150 1151def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, 1152 Int32Regs:$amt), 1153 !strconcat("{{\n\t", 1154 !strconcat(".reg .b32 %lhs;\n\t", 1155 !strconcat(".reg .b32 %rhs;\n\t", 1156 !strconcat(".reg .b32 %amt2;\n\t", 1157 !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t", 1158 !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t", 1159 !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", 1160 !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", 1161 !strconcat("}}", ""))))))))), 1162 [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>; 1163 1164// 64bit 1165def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1166 i32imm:$amt1, i32imm:$amt2), 1167 !strconcat("{{\n\t", 1168 !strconcat(".reg .b64 %lhs;\n\t", 1169 !strconcat(".reg .b64 %rhs;\n\t", 1170 !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t", 1171 !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t", 1172 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1173 !strconcat("}}", ""))))))), 1174 []>; 1175 1176def SUB_FRM_64 : SDNodeXForm<imm, [{ 1177 return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32); 1178}]>; 1179 1180def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)), 1181 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>; 1182def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)), 1183 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>; 1184 1185def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1186 Int32Regs:$amt), 1187 !strconcat("{{\n\t", 1188 !strconcat(".reg .b64 %lhs;\n\t", 1189 !strconcat(".reg .b64 %rhs;\n\t", 1190 !strconcat(".reg .u32 %amt2;\n\t", 1191 !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t", 1192 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", 1193 !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t", 1194 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1195 !strconcat("}}", ""))))))))), 1196 [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>; 1197 1198def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, 1199 Int32Regs:$amt), 1200 !strconcat("{{\n\t", 1201 !strconcat(".reg .b64 %lhs;\n\t", 1202 !strconcat(".reg .b64 %rhs;\n\t", 1203 !strconcat(".reg .u32 %amt2;\n\t", 1204 !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t", 1205 !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t", 1206 !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t", 1207 !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t", 1208 !strconcat("}}", ""))))))))), 1209 [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; 1210 1211 1212//----------------------------------- 1213// Data Movement (Load / Store, Move) 1214//----------------------------------- 1215 1216def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], 1217 [SDNPWantRoot]>; 1218def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex], 1219 [SDNPWantRoot]>; 1220 1221def MEMri : Operand<i32> { 1222 let PrintMethod = "printMemOperand"; 1223 let MIOperandInfo = (ops Int32Regs, i32imm); 1224} 1225def MEMri64 : Operand<i64> { 1226 let PrintMethod = "printMemOperand"; 1227 let MIOperandInfo = (ops Int64Regs, i64imm); 1228} 1229 1230def imem : Operand<iPTR> { 1231 let PrintMethod = "printOperand"; 1232} 1233 1234def imemAny : Operand<iPTRAny> { 1235 let PrintMethod = "printOperand"; 1236} 1237 1238def LdStCode : Operand<i32> { 1239 let PrintMethod = "printLdStCode"; 1240} 1241 1242def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; 1243def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; 1244 1245def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a), 1246 "mov.u32 \t$dst, $a;", 1247 [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>; 1248 1249def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a), 1250 "mov.u64 \t$dst, $a;", 1251 [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>; 1252 1253// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp 1254let IsSimpleMove=1 in { 1255def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), 1256 "mov.pred \t$dst, $sss;", []>; 1257def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss), 1258 "mov.u16 \t$dst, $sss;", []>; 1259def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), 1260 "mov.u16 \t$dst, $sss;", []>; 1261def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), 1262 "mov.u32 \t$dst, $sss;", []>; 1263def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), 1264 "mov.u64 \t$dst, $sss;", []>; 1265 1266def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), 1267 "mov.f32 \t$dst, $src;", []>; 1268def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), 1269 "mov.f64 \t$dst, $src;", []>; 1270} 1271def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), 1272 "mov.pred \t$dst, $src;", 1273 [(set Int1Regs:$dst, imm:$src)]>; 1274def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src), 1275 "mov.u16 \t$dst, $src;", 1276 [(set Int8Regs:$dst, imm:$src)]>; 1277def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), 1278 "mov.u16 \t$dst, $src;", 1279 [(set Int16Regs:$dst, imm:$src)]>; 1280def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), 1281 "mov.u32 \t$dst, $src;", 1282 [(set Int32Regs:$dst, imm:$src)]>; 1283def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), 1284 "mov.u64 \t$dst, $src;", 1285 [(set Int64Regs:$dst, imm:$src)]>; 1286 1287def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), 1288 "mov.f32 \t$dst, $src;", 1289 [(set Float32Regs:$dst, fpimm:$src)]>; 1290def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), 1291 "mov.f64 \t$dst, $src;", 1292 [(set Float64Regs:$dst, fpimm:$src)]>; 1293 1294def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; 1295 1296//---- Copy Frame Index ---- 1297def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr), 1298 "add.u32 \t$dst, ${addr:add};", 1299 [(set Int32Regs:$dst, ADDRri:$addr)]>; 1300def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr), 1301 "add.u64 \t$dst, ${addr:add};", 1302 [(set Int64Regs:$dst, ADDRri64:$addr)]>; 1303 1304//----------------------------------- 1305// Comparison and Selection 1306//----------------------------------- 1307 1308// Generate string block like 1309// { 1310// .reg .pred p; 1311// setp.gt.s16 p, %a, %b; 1312// selp.s16 %dst, -1, 0, p; 1313// } 1314// when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b 1315class Set_Str<string OpcStr, string sz1, string sz2, string d, string a, 1316 string b> { 1317 string t1 = "{{\n\t.reg .pred p;\n\t"; 1318 string t2 = !strconcat(t1 , OpcStr); 1319 string t3 = !strconcat(t2 , sz1); 1320 string t4 = !strconcat(t3 , " \tp, "); 1321 string t5 = !strconcat(t4 , a); 1322 string t6 = !strconcat(t5 , ", "); 1323 string t7 = !strconcat(t6 , b); 1324 string t8 = !strconcat(t7 , ";\n\tselp.s"); 1325 string t9 = !strconcat(t8 , sz2); 1326 string t10 = !strconcat(t9, " \t"); 1327 string t11 = !strconcat(t10, d); 1328 string s = !strconcat(t11, ", -1, 0, p;\n\t}}"); 1329} 1330 1331// Generate string block like 1332// { 1333// .reg .pred p; 1334// .reg .s16 %temp1; 1335// .reg .s16 %temp2; 1336// cvt.s16.s8 %temp1, %a; 1337// cvt s16.s8 %temp1, %b; 1338// setp.gt.s16 p, %temp1, %temp2; 1339// selp.s16 %dst, -1, 0, p; 1340// } 1341// when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8 1342class Set_Stri8<string OpcStr, string d, string a, string b, string type, 1343 string cvt> { 1344 string t1 = "{{\n\t.reg .pred p;\n\t"; 1345 string t2 = !strconcat(t1, ".reg ."); 1346 string t3 = !strconcat(t2, type); 1347 string t4 = !strconcat(t3, " %temp1;\n\t"); 1348 string t5 = !strconcat(t4, ".reg ."); 1349 string t6 = !strconcat(t5, type); 1350 string t7 = !strconcat(t6, " %temp2;\n\t"); 1351 string t8 = !strconcat(t7, cvt); 1352 string t9 = !strconcat(t8, " \t%temp1, "); 1353 string t10 = !strconcat(t9, a); 1354 string t11 = !strconcat(t10, ";\n\t"); 1355 string t12 = !strconcat(t11, cvt); 1356 string t13 = !strconcat(t12, " \t%temp2, "); 1357 string t14 = !strconcat(t13, b); 1358 string t15 = !strconcat(t14, ";\n\t"); 1359 string t16 = !strconcat(t15, OpcStr); 1360 string t17 = !strconcat(t16, "16"); 1361 string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t"); 1362 string t19 = !strconcat(t18, "selp.s16 \t"); 1363 string t20 = !strconcat(t19, d); 1364 string s = !strconcat(t20, ", -1, 0, p;\n\t}}"); 1365} 1366 1367multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode, 1368 string TypeStr, string CVTStr> { 1369 def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 1370 Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s, 1371 []>; 1372 def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, 1373 Int16Regs:$b), 1374 Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s, 1375 []>; 1376 def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1377 Int32Regs:$b), 1378 Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s, 1379 []>; 1380 def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, 1381 Int64Regs:$b), 1382 Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s, 1383 []>; 1384 1385 def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 1386 Handle_i8rr<OpcStr, TypeStr, CVTStr>.s, 1387 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 1388 def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 1389 Handle_i8ri<OpcStr, TypeStr, CVTStr>.s, 1390 [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; 1391 def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b), 1392 Handle_i8ir<OpcStr, TypeStr, CVTStr>.s, 1393 [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; 1394 def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), 1395 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1396 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; 1397 def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 1398 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1399 [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; 1400 def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b), 1401 !strconcat(OpcStr, "16 \t$dst, $a, $b;"), 1402 [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; 1403 def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), 1404 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1405 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; 1406 def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1407 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1408 [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 1409 def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b), 1410 !strconcat(OpcStr, "32 \t$dst, $a, $b;"), 1411 [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; 1412 def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b), 1413 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1414 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; 1415 def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 1416 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1417 [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 1418 def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b), 1419 !strconcat(OpcStr, "64 \t$dst, $a, $b;"), 1420 [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; 1421 1422 def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b), 1423 Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s, 1424 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>; 1425 def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b), 1426 Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s, 1427 [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>; 1428 def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b), 1429 Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s, 1430 [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>; 1431 def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, 1432 Int16Regs:$b), 1433 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), 1434 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>; 1435 def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), 1436 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), 1437 [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>; 1438 def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b), 1439 !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"), 1440 [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>; 1441 def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, 1442 Int32Regs:$b), 1443 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), 1444 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>; 1445 def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b), 1446 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), 1447 [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>; 1448 def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b), 1449 !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"), 1450 [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>; 1451 def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, 1452 Int64Regs:$b), 1453 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), 1454 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>; 1455 def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b), 1456 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), 1457 [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>; 1458 def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b), 1459 !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"), 1460 [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>; 1461} 1462 1463multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> { 1464 def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, 1465 Float32Regs:$b), 1466 Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s, 1467 []>, Requires<[doF32FTZ]>; 1468 def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a, 1469 Float32Regs:$b), 1470 Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s, 1471 []>; 1472 def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a, 1473 Float64Regs:$b), 1474 Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s, 1475 []>; 1476 def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a, 1477 Float64Regs:$b), 1478 Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s, 1479 []>; 1480 1481 def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a 1482 , Float32Regs:$b), 1483 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), 1484 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]> 1485 , Requires<[doF32FTZ]>; 1486 def f32rr_p: NVPTXInst<(outs Int1Regs:$dst), 1487 (ins Float32Regs:$a, Float32Regs:$b), 1488 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), 1489 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; 1490 def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst), 1491 (ins Float32Regs:$a, f32imm:$b), 1492 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), 1493 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, 1494 Requires<[doF32FTZ]>; 1495 def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b), 1496 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), 1497 [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; 1498 def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst), 1499 (ins f32imm:$a, Float32Regs:$b), 1500 !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"), 1501 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>, 1502 Requires<[doF32FTZ]>; 1503 def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b), 1504 !strconcat(OpcStr, "f32 \t$dst, $a, $b;"), 1505 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; 1506 def f64rr_p: NVPTXInst<(outs Int1Regs:$dst), 1507 (ins Float64Regs:$a, Float64Regs:$b), 1508 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), 1509 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; 1510 def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b), 1511 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), 1512 [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; 1513 def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b), 1514 !strconcat(OpcStr, "f64 \t$dst, $a, $b;"), 1515 [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; 1516 1517 def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), 1518 (ins Float32Regs:$a, Float32Regs:$b), 1519 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), 1520 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; 1521 def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst), 1522 (ins Float32Regs:$a, Float32Regs:$b), 1523 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), 1524 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>; 1525 def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), 1526 (ins Float32Regs:$a, f32imm:$b), 1527 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), 1528 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; 1529 def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst), 1530 (ins Float32Regs:$a, f32imm:$b), 1531 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), 1532 [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; 1533 def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst), 1534 (ins f32imm:$a, Float32Regs:$b), 1535 !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"), 1536 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; 1537 def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst), 1538 (ins f32imm:$a, Float32Regs:$b), 1539 !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"), 1540 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>; 1541 def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst), 1542 (ins Float64Regs:$a, Float64Regs:$b), 1543 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), 1544 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>; 1545 def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst), 1546 (ins Float64Regs:$a, f64imm:$b), 1547 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), 1548 [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>; 1549 def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst), 1550 (ins f64imm:$a, Float64Regs:$b), 1551 !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"), 1552 [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>; 1553} 1554 1555defm ISetSGT 1556: ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">; 1557defm ISetUGT 1558: ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">; 1559defm ISetSLT 1560: ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">; 1561defm ISetULT 1562: ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">; 1563defm ISetSGE 1564: ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">; 1565defm ISetUGE 1566: ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">; 1567defm ISetSLE 1568: ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">; 1569defm ISetULE 1570: ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">; 1571defm ISetSEQ 1572: ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">; 1573defm ISetUEQ 1574: ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">; 1575defm ISetSNE 1576: ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">; 1577defm ISetUNE 1578: ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">; 1579 1580def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), 1581 (ins Int1Regs:$a, Int1Regs:$b), 1582 "xor.pred \t$dst, $a, $b;", 1583 [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; 1584def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst), 1585 (ins Int1Regs:$a, Int1Regs:$b), 1586 "xor.pred \t$dst, $a, $b;", 1587 [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>; 1588def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), 1589 (ins Int1Regs:$a, Int1Regs:$b), 1590 !strconcat("{{\n\t", 1591 !strconcat(".reg .pred temp;\n\t", 1592 !strconcat("xor.pred \ttemp, $a, $b;\n\t", 1593 !strconcat("not.pred \t$dst, temp;\n\t}}","")))), 1594 [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; 1595def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst), 1596 (ins Int1Regs:$a, Int1Regs:$b), 1597 !strconcat("{{\n\t", 1598 !strconcat(".reg .pred temp;\n\t", 1599 !strconcat("xor.pred \ttemp, $a, $b;\n\t", 1600 !strconcat("not.pred \t$dst, temp;\n\t}}","")))), 1601 [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>; 1602 1603// Compare 2 i1's and produce a u32 1604def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), 1605 (ins Int1Regs:$a, Int1Regs:$b), 1606 !strconcat("{{\n\t", 1607 !strconcat(".reg .pred temp;\n\t", 1608 !strconcat("xor.pred \ttemp, $a, $b;\n\t", 1609 !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))), 1610 [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>; 1611def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst), 1612 (ins Int1Regs:$a, Int1Regs:$b), 1613 !strconcat("{{\n\t", 1614 !strconcat(".reg .pred temp;\n\t", 1615 !strconcat("xor.pred \ttemp, $a, $b;\n\t", 1616 !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))), 1617 [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>; 1618 1619defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>; 1620defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>; 1621defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>; 1622defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>; 1623defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>; 1624defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>; 1625 1626defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>; 1627defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>; 1628defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>; 1629defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>; 1630defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>; 1631defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>; 1632 1633defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>; 1634defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>; 1635 1636def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), 1637 (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), 1638 (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; 1639def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst), 1640 (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p), 1641 "selp.b16 \t$dst, $a, $b, $p;", 1642 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>; 1643def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst), 1644 (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p), 1645 "selp.b16 \t$dst, $a, $b, $p;", 1646 [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>; 1647def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst), 1648 (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p), 1649 "selp.b16 \t$dst, $a, $b, $p;", 1650 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>; 1651def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst), 1652 (ins i8imm:$a, i8imm:$b, Int1Regs:$p), 1653 "selp.b16 \t$dst, $a, $b, $p;", 1654 [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; 1655 1656def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst), 1657 (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p), 1658 "selp.b16 \t$dst, $a, $b, $p;", 1659 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>; 1660def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst), 1661 (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p), 1662 "selp.b16 \t$dst, $a, $b, $p;", 1663 [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>; 1664def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst), 1665 (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p), 1666 "selp.b16 \t$dst, $a, $b, $p;", 1667 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>; 1668def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst), 1669 (ins i16imm:$a, i16imm:$b, Int1Regs:$p), 1670 "selp.b16 \t$dst, $a, $b, $p;", 1671 [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; 1672 1673def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst), 1674 (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p), 1675 "selp.b32 \t$dst, $a, $b, $p;", 1676 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>; 1677def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst), 1678 (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p), 1679 "selp.b32 \t$dst, $a, $b, $p;", 1680 [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>; 1681def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst), 1682 (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p), 1683 "selp.b32 \t$dst, $a, $b, $p;", 1684 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>; 1685def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst), 1686 (ins i32imm:$a, i32imm:$b, Int1Regs:$p), 1687 "selp.b32 \t$dst, $a, $b, $p;", 1688 [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; 1689 1690def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst), 1691 (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p), 1692 "selp.b64 \t$dst, $a, $b, $p;", 1693 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>; 1694def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst), 1695 (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p), 1696 "selp.b64 \t$dst, $a, $b, $p;", 1697 [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>; 1698def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst), 1699 (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p), 1700 "selp.b64 \t$dst, $a, $b, $p;", 1701 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>; 1702def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst), 1703 (ins i64imm:$a, i64imm:$b, Int1Regs:$p), 1704 "selp.b64 \t$dst, $a, $b, $p;", 1705 [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>; 1706 1707def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst), 1708 (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p), 1709 "selp.f32 \t$dst, $a, $b, $p;", 1710 [(set Float32Regs:$dst, 1711 (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>; 1712def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst), 1713 (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p), 1714 "selp.f32 \t$dst, $a, $b, $p;", 1715 [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>; 1716def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst), 1717 (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p), 1718 "selp.f32 \t$dst, $a, $b, $p;", 1719 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>; 1720def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst), 1721 (ins f32imm:$a, f32imm:$b, Int1Regs:$p), 1722 "selp.f32 \t$dst, $a, $b, $p;", 1723 [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; 1724 1725def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst), 1726 (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p), 1727 "selp.f64 \t$dst, $a, $b, $p;", 1728 [(set Float64Regs:$dst, 1729 (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>; 1730def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst), 1731 (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p), 1732 "selp.f64 \t$dst, $a, $b, $p;", 1733 [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>; 1734def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst), 1735 (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p), 1736 "selp.f64 \t$dst, $a, $b, $p;", 1737 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>; 1738def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst), 1739 (ins f64imm:$a, f64imm:$b, Int1Regs:$p), 1740 "selp.f64 \t $dst, $a, $b, $p;", 1741 [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>; 1742 1743//def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad, 1744// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 1745 1746def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 1747 SDTCisInt<2>]>; 1748def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, 1749 SDTCisInt<1>, SDTCisInt<2>]>; 1750def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 1751def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 1752def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 1753def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 1754def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; 1755def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 1756def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>; 1757def SDTCallVoidProfile : SDTypeProfile<0, 1, []>; 1758def SDTCallValProfile : SDTypeProfile<1, 0, []>; 1759def SDTMoveParamProfile : SDTypeProfile<1, 1, []>; 1760def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>; 1761def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; 1762def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>; 1763 1764def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile, 1765 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1766def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam", 1767 SDTDeclareScalarParamProfile, 1768 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1769def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam", 1770 SDTDeclareParamProfile, 1771 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1772def DeclareRet : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile, 1773 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1774def LoadParam : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile, 1775 [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>; 1776def PrintCall : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile, 1777 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1778def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile, 1779 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1780def StoreParam : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile, 1781 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1782def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile, 1783 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1784def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile, 1785 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1786def MoveToParam : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile, 1787 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1788def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile, 1789 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1790def CallArg : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile, 1791 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1792def LastCallArg : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile, 1793 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1794def CallArgEnd : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile, 1795 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1796def CallVoid : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile, 1797 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1798def Prototype : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile, 1799 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1800def CallVal : SDNode<"NVPTXISD::CallVal", SDTCallValProfile, 1801 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1802def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, 1803 []>; 1804def MoveRetval : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile, 1805 [SDNPHasChain, SDNPSideEffect]>; 1806def StoreRetval : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, 1807 [SDNPHasChain, SDNPSideEffect]>; 1808def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile, 1809 [SDNPHasChain, SDNPSideEffect]>; 1810def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam", 1811 SDTPseudoUseParamProfile, 1812 [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; 1813def RETURNNode : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile, 1814 [SDNPHasChain, SDNPSideEffect]>; 1815 1816class LoadParamMemInst<NVPTXRegClass regclass, string opstr> : 1817 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 1818 !strconcat(!strconcat("ld.param", opstr), 1819 "\t$dst, [retval0+$b];"), 1820 [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; 1821 1822class LoadParamRegInst<NVPTXRegClass regclass, string opstr> : 1823 NVPTXInst<(outs regclass:$dst), (ins i32imm:$b), 1824 !strconcat(!strconcat("mov", opstr), 1825 "\t$dst, retval$b;"), 1826 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>; 1827 1828class StoreParamInst<NVPTXRegClass regclass, string opstr> : 1829 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), 1830 !strconcat(!strconcat("st.param", opstr), 1831 "\t[param$a+$b], $val;"), 1832 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; 1833 1834class MoveToParamInst<NVPTXRegClass regclass, string opstr> : 1835 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), 1836 !strconcat(!strconcat("mov", opstr), 1837 "\tparam$a, $val;"), 1838 [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>; 1839 1840class StoreRetvalInst<NVPTXRegClass regclass, string opstr> : 1841 NVPTXInst<(outs), (ins regclass:$val, i32imm:$a), 1842 !strconcat(!strconcat("st.param", opstr), 1843 "\t[func_retval0+$a], $val;"), 1844 [(StoreRetval (i32 imm:$a), regclass:$val)]>; 1845 1846class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> : 1847 NVPTXInst<(outs), (ins i32imm:$num, regclass:$val), 1848 !strconcat(!strconcat("mov", opstr), 1849 "\tfunc_retval$num, $val;"), 1850 [(MoveToRetval (i32 imm:$num), regclass:$val)]>; 1851 1852class MoveRetvalInst<NVPTXRegClass regclass, string opstr> : 1853 NVPTXInst<(outs), (ins regclass:$val), 1854 !strconcat(!strconcat("mov", opstr), 1855 "\tfunc_retval0, $val;"), 1856 [(MoveRetval regclass:$val)]>; 1857 1858def PrintCallRetInst1 : NVPTXInst<(outs), (ins), 1859"call (retval0), ", 1860 [(PrintCall (i32 1))]>; 1861def PrintCallRetInst2 : NVPTXInst<(outs), (ins), 1862"call (retval0, retval1), ", 1863 [(PrintCall (i32 2))]>; 1864def PrintCallRetInst3 : NVPTXInst<(outs), (ins), 1865"call (retval0, retval1, retval2), ", 1866 [(PrintCall (i32 3))]>; 1867def PrintCallRetInst4 : NVPTXInst<(outs), (ins), 1868"call (retval0, retval1, retval2, retval3), ", 1869 [(PrintCall (i32 4))]>; 1870def PrintCallRetInst5 : NVPTXInst<(outs), (ins), 1871"call (retval0, retval1, retval2, retval3, retval4), ", 1872 [(PrintCall (i32 5))]>; 1873def PrintCallRetInst6 : NVPTXInst<(outs), (ins), 1874"call (retval0, retval1, retval2, retval3, retval4, retval5), ", 1875 [(PrintCall (i32 6))]>; 1876def PrintCallRetInst7 : NVPTXInst<(outs), (ins), 1877"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", 1878 [(PrintCall (i32 7))]>; 1879def PrintCallRetInst8 : NVPTXInst<(outs), (ins), 1880!strconcat("call (retval0, retval1, retval2, retval3, retval4", 1881 ", retval5, retval6, retval7), "), 1882 [(PrintCall (i32 8))]>; 1883 1884def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ", 1885 [(PrintCall (i32 0))]>; 1886 1887def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins), 1888"call.uni (retval0), ", 1889 [(PrintCallUni (i32 1))]>; 1890def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins), 1891"call.uni (retval0, retval1), ", 1892 [(PrintCallUni (i32 2))]>; 1893def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins), 1894"call.uni (retval0, retval1, retval2), ", 1895 [(PrintCallUni (i32 3))]>; 1896def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins), 1897"call.uni (retval0, retval1, retval2, retval3), ", 1898 [(PrintCallUni (i32 4))]>; 1899def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins), 1900"call.uni (retval0, retval1, retval2, retval3, retval4), ", 1901 [(PrintCallUni (i32 5))]>; 1902def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins), 1903"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ", 1904 [(PrintCallUni (i32 6))]>; 1905def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins), 1906"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ", 1907 [(PrintCallUni (i32 7))]>; 1908def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins), 1909!strconcat("call.uni (retval0, retval1, retval2, retval3, retval4", 1910 ", retval5, retval6, retval7), "), 1911 [(PrintCallUni (i32 8))]>; 1912 1913def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ", 1914 [(PrintCallUni (i32 0))]>; 1915 1916def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">; 1917def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">; 1918def LoadParamMemI16 : LoadParamMemInst<Int16Regs, ".b16">; 1919def LoadParamMemI8 : LoadParamMemInst<Int8Regs, ".b8">; 1920 1921//def LoadParamMemI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), 1922// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", 1923// "cvt.u16.u32\t$dst, temp_param_reg;"), 1924// [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; 1925//def LoadParamMemI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), 1926// !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t", 1927// "cvt.u16.u32\t$dst, temp_param_reg;"), 1928// [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>; 1929 1930def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">; 1931def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">; 1932 1933def LoadParamRegI64 : LoadParamRegInst<Int64Regs, ".b64">; 1934def LoadParamRegI32 : LoadParamRegInst<Int32Regs, ".b32">; 1935def LoadParamRegI16 : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b), 1936 "cvt.u16.u32\t$dst, retval$b;", 1937 [(set Int16Regs:$dst, 1938 (LoadParam (i32 0), (i32 imm:$b)))]>; 1939def LoadParamRegI8 : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b), 1940 "cvt.u16.u32\t$dst, retval$b;", 1941 [(set Int8Regs:$dst, 1942 (LoadParam (i32 0), (i32 imm:$b)))]>; 1943 1944def LoadParamRegF32 : LoadParamRegInst<Float32Regs, ".f32">; 1945def LoadParamRegF64 : LoadParamRegInst<Float64Regs, ".f64">; 1946 1947def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">; 1948def StoreParamI32 : StoreParamInst<Int32Regs, ".b32">; 1949 1950def StoreParamI16 : NVPTXInst<(outs), 1951 (ins Int16Regs:$val, i32imm:$a, i32imm:$b), 1952 "st.param.b16\t[param$a+$b], $val;", 1953 [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; 1954 1955def StoreParamI8 : NVPTXInst<(outs), 1956 (ins Int8Regs:$val, i32imm:$a, i32imm:$b), 1957 "st.param.b8\t[param$a+$b], $val;", 1958 [(StoreParam 1959 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; 1960 1961def StoreParamS32I16 : NVPTXInst<(outs), 1962 (ins Int16Regs:$val, i32imm:$a, i32imm:$b), 1963 !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t", 1964 "st.param.b32\t[param$a+$b], temp_param_reg;"), 1965 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; 1966def StoreParamU32I16 : NVPTXInst<(outs), 1967 (ins Int16Regs:$val, i32imm:$a, i32imm:$b), 1968 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", 1969 "st.param.b32\t[param$a+$b], temp_param_reg;"), 1970 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; 1971 1972def StoreParamU32I8 : NVPTXInst<(outs), 1973 (ins Int8Regs:$val, i32imm:$a, i32imm:$b), 1974 !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t", 1975 "st.param.b32\t[param$a+$b], temp_param_reg;"), 1976 [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; 1977def StoreParamS32I8 : NVPTXInst<(outs), 1978 (ins Int8Regs:$val, i32imm:$a, i32imm:$b), 1979 !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t", 1980 "st.param.b32\t[param$a+$b], temp_param_reg;"), 1981 [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; 1982 1983def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">; 1984def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">; 1985 1986def MoveToParamI64 : MoveToParamInst<Int64Regs, ".b64">; 1987def MoveToParamI32 : MoveToParamInst<Int32Regs, ".b32">; 1988def MoveToParamF64 : MoveToParamInst<Float64Regs, ".f64">; 1989def MoveToParamF32 : MoveToParamInst<Float32Regs, ".f32">; 1990def MoveToParamI16 : NVPTXInst<(outs), 1991 (ins Int16Regs:$val, i32imm:$a, i32imm:$b), 1992 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", 1993 "mov.b32\tparam$a, temp_param_reg;"), 1994 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>; 1995def MoveToParamI8 : NVPTXInst<(outs), 1996 (ins Int8Regs:$val, i32imm:$a, i32imm:$b), 1997 !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t", 1998 "mov.b32\tparam$a, temp_param_reg;"), 1999 [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>; 2000 2001def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">; 2002def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">; 2003def StoreRetvalI16 : StoreRetvalInst<Int16Regs, ".b16">; 2004def StoreRetvalI8 : StoreRetvalInst<Int8Regs, ".b8">; 2005 2006//def StoreRetvalI16 : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a), 2007// !strconcat("\{\n\t", 2008// !strconcat(".reg .b32 temp_retval_reg;\n\t", 2009// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", 2010// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), 2011// [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>; 2012//def StoreRetvalI8 : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a), 2013// !strconcat("\{\n\t", 2014// !strconcat(".reg .b32 temp_retval_reg;\n\t", 2015// !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t", 2016// "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))), 2017// [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>; 2018 2019def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">; 2020def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">; 2021 2022def MoveRetvalI64 : MoveRetvalInst<Int64Regs, ".b64">; 2023def MoveRetvalI32 : MoveRetvalInst<Int32Regs, ".b32">; 2024def MoveRetvalI16 : MoveRetvalInst<Int16Regs, ".b16">; 2025def MoveRetvalI8 : MoveRetvalInst<Int8Regs, ".b8">; 2026def MoveRetvalF64 : MoveRetvalInst<Float64Regs, ".f64">; 2027def MoveRetvalF32 : MoveRetvalInst<Float32Regs, ".f32">; 2028 2029def MoveToRetvalI64 : MoveToRetvalInst<Int64Regs, ".b64">; 2030def MoveToRetvalI32 : MoveToRetvalInst<Int32Regs, ".b32">; 2031def MoveToRetvalF64 : MoveToRetvalInst<Float64Regs, ".f64">; 2032def MoveToRetvalF32 : MoveToRetvalInst<Float32Regs, ".f32">; 2033def MoveToRetvalI16 : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val), 2034 "cvt.u32.u16\tfunc_retval$num, $val;", 2035 [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>; 2036def MoveToRetvalI8 : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val), 2037 "cvt.u32.u16\tfunc_retval$num, $val;", 2038 [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>; 2039 2040def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; 2041def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; 2042def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>; 2043def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>; 2044 2045class CallArgInst<NVPTXRegClass regclass> : 2046 NVPTXInst<(outs), (ins regclass:$a), "$a, ", 2047 [(CallArg (i32 0), regclass:$a)]>; 2048 2049class LastCallArgInst<NVPTXRegClass regclass> : 2050 NVPTXInst<(outs), (ins regclass:$a), "$a", 2051 [(LastCallArg (i32 0), regclass:$a)]>; 2052 2053def CallArgI64 : CallArgInst<Int64Regs>; 2054def CallArgI32 : CallArgInst<Int32Regs>; 2055def CallArgI16 : CallArgInst<Int16Regs>; 2056def CallArgI8 : CallArgInst<Int8Regs>; 2057 2058def CallArgF64 : CallArgInst<Float64Regs>; 2059def CallArgF32 : CallArgInst<Float32Regs>; 2060 2061def LastCallArgI64 : LastCallArgInst<Int64Regs>; 2062def LastCallArgI32 : LastCallArgInst<Int32Regs>; 2063def LastCallArgI16 : LastCallArgInst<Int16Regs>; 2064def LastCallArgI8 : LastCallArgInst<Int8Regs>; 2065 2066def LastCallArgF64 : LastCallArgInst<Float64Regs>; 2067def LastCallArgF32 : LastCallArgInst<Float32Regs>; 2068 2069def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ", 2070 [(CallArg (i32 0), (i32 imm:$a))]>; 2071def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a", 2072 [(LastCallArg (i32 0), (i32 imm:$a))]>; 2073 2074def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ", 2075 [(CallArg (i32 1), (i32 imm:$a))]>; 2076def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a", 2077 [(LastCallArg (i32 1), (i32 imm:$a))]>; 2078 2079def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), 2080 "$addr, ", 2081 [(CallVoid (Wrapper tglobaladdr:$addr))]>; 2082def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), 2083 "$addr, ", 2084 [(CallVoid Int32Regs:$addr)]>; 2085def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 2086 "$addr, ", 2087 [(CallVoid Int64Regs:$addr)]>; 2088def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), 2089 ", prototype_$val;", 2090 [(Prototype (i32 imm:$val))]>; 2091 2092def DeclareRetMemInst : NVPTXInst<(outs), 2093 (ins i32imm:$align, i32imm:$size, i32imm:$num), 2094 ".param .align $align .b8 retval$num[$size];", 2095 [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>; 2096def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 2097 ".param .b$size retval$num;", 2098 [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>; 2099def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num), 2100 ".reg .b$size retval$num;", 2101 [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>; 2102 2103def DeclareParamInst : NVPTXInst<(outs), 2104 (ins i32imm:$align, i32imm:$a, i32imm:$size), 2105 ".param .align $align .b8 param$a[$size];", 2106 [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>; 2107def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 2108 ".param .b$size param$a;", 2109 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>; 2110def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size), 2111 ".reg .b$size param$a;", 2112 [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>; 2113 2114class MoveParamInst<NVPTXRegClass regclass, string asmstr> : 2115 NVPTXInst<(outs regclass:$dst), (ins regclass:$src), 2116 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"), 2117 [(set regclass:$dst, (MoveParam regclass:$src))]>; 2118 2119def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">; 2120def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">; 2121def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), 2122 "cvt.u16.u32\t$dst, $src;", 2123 [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; 2124def MoveParamI8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src), 2125 "cvt.u16.u32\t$dst, $src;", 2126 [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>; 2127def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">; 2128def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">; 2129 2130class PseudoUseParamInst<NVPTXRegClass regclass> : 2131 NVPTXInst<(outs), (ins regclass:$src), 2132 "// Pseudo use of $src", 2133 [(PseudoUseParam regclass:$src)]>; 2134 2135def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>; 2136def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>; 2137def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>; 2138def PseudoUseParamI8 : PseudoUseParamInst<Int8Regs>; 2139def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>; 2140def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>; 2141 2142 2143// 2144// Load / Store Handling 2145// 2146multiclass LD<NVPTXRegClass regclass> { 2147 def _avar : NVPTXInst<(outs regclass:$dst), 2148 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2149 i32imm:$fromWidth, imem:$addr), 2150!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2151 "$fromWidth \t$dst, [$addr];"), []>; 2152 def _areg : NVPTXInst<(outs regclass:$dst), 2153 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2154 i32imm:$fromWidth, Int32Regs:$addr), 2155!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2156 "$fromWidth \t$dst, [$addr];"), []>; 2157 def _areg_64 : NVPTXInst<(outs regclass:$dst), 2158 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2159 i32imm:$fromWidth, Int64Regs:$addr), 2160 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", 2161 " \t$dst, [$addr];"), []>; 2162 def _ari : NVPTXInst<(outs regclass:$dst), 2163 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2164 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2165!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2166 "$fromWidth \t$dst, [$addr+$offset];"), []>; 2167 def _ari_64 : NVPTXInst<(outs regclass:$dst), 2168 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2169 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2170 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth", 2171 " \t$dst, [$addr+$offset];"), []>; 2172 def _asi : NVPTXInst<(outs regclass:$dst), 2173 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2174 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2175!strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2176 "$fromWidth \t$dst, [$addr+$offset];"), []>; 2177} 2178 2179let mayLoad=1, neverHasSideEffects=1 in { 2180defm LD_i8 : LD<Int8Regs>; 2181defm LD_i16 : LD<Int16Regs>; 2182defm LD_i32 : LD<Int32Regs>; 2183defm LD_i64 : LD<Int64Regs>; 2184defm LD_f32 : LD<Float32Regs>; 2185defm LD_f64 : LD<Float64Regs>; 2186} 2187 2188multiclass ST<NVPTXRegClass regclass> { 2189 def _avar : NVPTXInst<(outs), 2190 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2191 LdStCode:$Sign, i32imm:$toWidth, imem:$addr), 2192!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2193 " \t[$addr], $src;"), []>; 2194 def _areg : NVPTXInst<(outs), 2195 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2196 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr), 2197!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2198 " \t[$addr], $src;"), []>; 2199 def _areg_64 : NVPTXInst<(outs), 2200 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2201 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr), 2202 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", 2203 "\t[$addr], $src;"), []>; 2204 def _ari : NVPTXInst<(outs), 2205 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2206 LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset), 2207!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2208 " \t[$addr+$offset], $src;"), []>; 2209 def _ari_64 : NVPTXInst<(outs), 2210 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2211 LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset), 2212 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ", 2213 "\t[$addr+$offset], $src;"), []>; 2214 def _asi : NVPTXInst<(outs), 2215 (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, 2216 LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset), 2217!strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth", 2218 " \t[$addr+$offset], $src;"), []>; 2219} 2220 2221let mayStore=1, neverHasSideEffects=1 in { 2222defm ST_i8 : ST<Int8Regs>; 2223defm ST_i16 : ST<Int16Regs>; 2224defm ST_i32 : ST<Int32Regs>; 2225defm ST_i64 : ST<Int64Regs>; 2226defm ST_f32 : ST<Float32Regs>; 2227defm ST_f64 : ST<Float64Regs>; 2228} 2229 2230// The following is used only in and after vector elementizations. 2231// Vector elementization happens at the machine instruction level, so the 2232// following instruction 2233// never appears in the DAG. 2234multiclass LD_VEC<NVPTXRegClass regclass> { 2235 def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2236 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2237 i32imm:$fromWidth, imem:$addr), 2238 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2239 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; 2240 def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2241 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2242 i32imm:$fromWidth, Int32Regs:$addr), 2243 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2244 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; 2245 def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2246 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2247 i32imm:$fromWidth, Int64Regs:$addr), 2248 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2249 "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>; 2250 def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2251 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2252 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2253 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2254 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; 2255 def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2256 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2257 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2258 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2259 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; 2260 def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2261 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2262 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2263 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2264 "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>; 2265 def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, 2266 regclass:$dst3, regclass:$dst4), 2267 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2268 i32imm:$fromWidth, imem:$addr), 2269 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2270 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; 2271 def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2272 regclass:$dst4), 2273 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2274 i32imm:$fromWidth, Int32Regs:$addr), 2275 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2276 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; 2277 def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, 2278 regclass:$dst3, regclass:$dst4), 2279 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2280 i32imm:$fromWidth, Int64Regs:$addr), 2281 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2282 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>; 2283 def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2284 regclass:$dst4), 2285 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2286 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2287 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2288 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), 2289 []>; 2290 def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, 2291 regclass:$dst3, regclass:$dst4), 2292 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2293 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2294 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2295 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), 2296 []>; 2297 def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2298 regclass:$dst4), 2299 (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2300 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2301 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2302 "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"), 2303 []>; 2304} 2305let mayLoad=1, neverHasSideEffects=1 in { 2306defm LDV_i8 : LD_VEC<Int8Regs>; 2307defm LDV_i16 : LD_VEC<Int16Regs>; 2308defm LDV_i32 : LD_VEC<Int32Regs>; 2309defm LDV_i64 : LD_VEC<Int64Regs>; 2310defm LDV_f32 : LD_VEC<Float32Regs>; 2311defm LDV_f64 : LD_VEC<Float64Regs>; 2312} 2313 2314multiclass ST_VEC<NVPTXRegClass regclass> { 2315 def _v2_avar : NVPTXInst<(outs), 2316 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2317 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr), 2318 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2319 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; 2320 def _v2_areg : NVPTXInst<(outs), 2321 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2322 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr), 2323 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2324 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; 2325 def _v2_areg_64 : NVPTXInst<(outs), 2326 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2327 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr), 2328 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2329 "$fromWidth \t[$addr], {{$src1, $src2}};"), []>; 2330 def _v2_ari : NVPTXInst<(outs), 2331 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2332 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr, 2333 i32imm:$offset), 2334 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2335 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; 2336 def _v2_ari_64 : NVPTXInst<(outs), 2337 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2338 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, 2339 i32imm:$offset), 2340 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2341 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; 2342 def _v2_asi : NVPTXInst<(outs), 2343 (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp, 2344 LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, 2345 i32imm:$offset), 2346 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2347 "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>; 2348 def _v4_avar : NVPTXInst<(outs), 2349 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2350 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2351 i32imm:$fromWidth, imem:$addr), 2352 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2353 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; 2354 def _v4_areg : NVPTXInst<(outs), 2355 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2356 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2357 i32imm:$fromWidth, Int32Regs:$addr), 2358 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2359 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; 2360 def _v4_areg_64 : NVPTXInst<(outs), 2361 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2362 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2363 i32imm:$fromWidth, Int64Regs:$addr), 2364 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2365 "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>; 2366 def _v4_ari : NVPTXInst<(outs), 2367 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2368 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2369 i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset), 2370 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2371 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), 2372 []>; 2373 def _v4_ari_64 : NVPTXInst<(outs), 2374 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2375 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2376 i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset), 2377 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2378 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), 2379 []>; 2380 def _v4_asi : NVPTXInst<(outs), 2381 (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4, 2382 LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign, 2383 i32imm:$fromWidth, imem:$addr, i32imm:$offset), 2384 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}", 2385 "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"), 2386 []>; 2387} 2388let mayStore=1, neverHasSideEffects=1 in { 2389defm STV_i8 : ST_VEC<Int8Regs>; 2390defm STV_i16 : ST_VEC<Int16Regs>; 2391defm STV_i32 : ST_VEC<Int32Regs>; 2392defm STV_i64 : ST_VEC<Int64Regs>; 2393defm STV_f32 : ST_VEC<Float32Regs>; 2394defm STV_f64 : ST_VEC<Float64Regs>; 2395} 2396 2397 2398//---- Conversion ---- 2399 2400multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> { 2401// FIXME: need to add f16 support 2402// def CVTf16i8 : 2403// NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a), 2404// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"), 2405// [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>; 2406// def CVTf16i16 : 2407// NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a), 2408// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"), 2409// [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>; 2410// def CVTf16i32 : 2411// NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a), 2412// !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"), 2413// [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>; 2414// def CVTf16i64: 2415// NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a), 2416// !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), 2417// [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; 2418 2419 def CVTf32i1 : 2420 NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a), 2421 "selp.f32 \t$d, 1.0, 0.0, $a;", 2422 [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>; 2423 def CVTf32i8 : 2424 NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a), 2425 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"), 2426 [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>; 2427 def CVTf32i16 : 2428 NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a), 2429 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"), 2430 [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>; 2431 def CVTf32i32 : 2432 NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a), 2433 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"), 2434 [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>; 2435 def CVTf32i64: 2436 NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a), 2437 !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"), 2438 [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>; 2439 2440 def CVTf64i1 : 2441 NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a), 2442 "selp.f64 \t$d, 1.0, 0.0, $a;", 2443 [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>; 2444 def CVTf64i8 : 2445 NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a), 2446 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"), 2447 [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>; 2448 def CVTf64i16 : 2449 NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a), 2450 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"), 2451 [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>; 2452 def CVTf64i32 : 2453 NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a), 2454 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"), 2455 [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>; 2456 def CVTf64i64: 2457 NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a), 2458 !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"), 2459 [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>; 2460} 2461 2462defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>; 2463defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>; 2464 2465multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> { 2466// FIXME: need to add f16 support 2467// def CVTi8f16: 2468// NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a), 2469// !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"), 2470// [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>; 2471 def CVTi8f32_ftz: 2472 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), 2473 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), 2474 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2475 def CVTi8f32: 2476 NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a), 2477 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), 2478 [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>; 2479 def CVTi8f64: 2480 NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a), 2481 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), 2482 [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>; 2483 2484// FIXME: need to add f16 support 2485// def CVTi16f16: 2486// NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a), 2487// !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"), 2488// [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>; 2489 def CVTi16f32_ftz: 2490 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), 2491 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"), 2492 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2493 def CVTi16f32: 2494 NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a), 2495 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"), 2496 [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>; 2497 def CVTi16f64: 2498 NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a), 2499 !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"), 2500 [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>; 2501 2502// FIXME: need to add f16 support 2503// def CVTi32f16: def CVTi32f16: 2504// NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a), 2505// !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"), 2506// [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>; 2507 def CVTi32f32_ftz: 2508 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), 2509 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"), 2510 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2511 def CVTi32f32: 2512 NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a), 2513 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"), 2514 [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>; 2515 def CVTi32f64: 2516 NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a), 2517 !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"), 2518 [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>; 2519 2520// FIXME: need to add f16 support 2521// def CVTi64f16: 2522// NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a), 2523// !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"), 2524// [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>; 2525 def CVTi64f32_ftz: 2526 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), 2527 !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"), 2528 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2529 def CVTi64f32: 2530 NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a), 2531 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"), 2532 [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>; 2533 def CVTi64f64: 2534 NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a), 2535 !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"), 2536 [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>; 2537} 2538 2539defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>; 2540defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>; 2541 2542multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> { 2543 def ext1to8: 2544 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), 2545 "selp.u16 \t$d, 1, 0, $a;", 2546 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; 2547 def ext1to16: 2548 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), 2549 "selp.u16 \t$d, 1, 0, $a;", 2550 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; 2551 def ext1to32: 2552 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), 2553 "selp.u32 \t$d, 1, 0, $a;", 2554 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; 2555 def ext1to64: 2556 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), 2557 "selp.u64 \t$d, 1, 0, $a;", 2558 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; 2559} 2560 2561multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> { 2562 def ext1to8: 2563 NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a), 2564 "selp.s16 \t$d, -1, 0, $a;", 2565 [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>; 2566 def ext1to16: 2567 NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a), 2568 "selp.s16 \t$d, -1, 0, $a;", 2569 [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>; 2570 def ext1to32: 2571 NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a), 2572 "selp.s32 \t$d, -1, 0, $a;", 2573 [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>; 2574 def ext1to64: 2575 NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a), 2576 "selp.s64 \t$d, -1, 0, $a;", 2577 [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>; 2578} 2579 2580multiclass INT_EXTEND <string OpStr, SDNode OpNode> { 2581 // All Int8Regs are emiited as 16bit registers in ptx. 2582 // And there is no selp.u8 in ptx. 2583 def ext8to16: 2584 NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a), 2585 !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.", 2586 !strconcat(OpStr, "8 \t$d, $a;")))), 2587 [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>; 2588 def ext8to32: 2589 NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a), 2590 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", 2591 !strconcat(OpStr, "8 \t$d, $a;")))), 2592 [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>; 2593 def ext8to64: 2594 NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a), 2595 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", 2596 !strconcat(OpStr, "8 \t$d, $a;")))), 2597 [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>; 2598 def ext16to32: 2599 NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a), 2600 !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.", 2601 !strconcat(OpStr, "16 \t$d, $a;")))), 2602 [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>; 2603 def ext16to64: 2604 NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a), 2605 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", 2606 !strconcat(OpStr, "16 \t$d, $a;")))), 2607 [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>; 2608 def ext32to64: 2609 NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a), 2610 !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.", 2611 !strconcat(OpStr, "32 \t$d, $a;")))), 2612 [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>; 2613} 2614 2615defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>; 2616defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>; 2617defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>; 2618 2619defm Sint_extend : INT_EXTEND <"s", sext>; 2620defm Zint_extend : INT_EXTEND <"u", zext>; 2621defm Aint_extend : INT_EXTEND <"u", anyext>; 2622 2623class TRUNC_to1_asm<string sz> { 2624 string s = !strconcat("{{\n\t", 2625 !strconcat(".reg ", 2626 !strconcat(sz, 2627 !strconcat(" temp;\n\t", 2628 !strconcat("and", 2629 !strconcat(sz, 2630 !strconcat("\t temp, $a, 1;\n\t", 2631 !strconcat("setp", 2632 !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}"))))))))); 2633} 2634 2635def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 2636 "cvt.u32.u64 \t$d, $a;", 2637 [(set Int32Regs:$d, (trunc Int64Regs:$a))]>; 2638def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a), 2639 "cvt.u16.u64 \t$d, $a;", 2640 [(set Int16Regs:$d, (trunc Int64Regs:$a))]>; 2641def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a), 2642 "cvt.u8.u64 \t$d, $a;", 2643 [(set Int8Regs:$d, (trunc Int64Regs:$a))]>; 2644def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a), 2645 "cvt.u16.u32 \t$d, $a;", 2646 [(set Int16Regs:$d, (trunc Int32Regs:$a))]>; 2647def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a), 2648 "cvt.u8.u32 \t$d, $a;", 2649 [(set Int8Regs:$d, (trunc Int32Regs:$a))]>; 2650def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a), 2651 "cvt.u8.u16 \t$d, $a;", 2652 [(set Int8Regs:$d, (trunc Int16Regs:$a))]>; 2653def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2654 TRUNC_to1_asm<".b64">.s, 2655 [(set Int1Regs:$d, (trunc Int64Regs:$a))]>; 2656def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2657 TRUNC_to1_asm<".b32">.s, 2658 [(set Int1Regs:$d, (trunc Int32Regs:$a))]>; 2659def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a), 2660 TRUNC_to1_asm<".b16">.s, 2661 [(set Int1Regs:$d, (trunc Int16Regs:$a))]>; 2662def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a), 2663 TRUNC_to1_asm<".b16">.s, 2664 [(set Int1Regs:$d, (trunc Int8Regs:$a))]>; 2665 2666// Select instructions 2667def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b), 2668 (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>; 2669def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), 2670 (SELECTi16rr Int16Regs:$a, Int16Regs:$b, 2671 (TRUNC_32to1 Int32Regs:$pred))>; 2672def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), 2673 (SELECTi32rr Int32Regs:$a, Int32Regs:$b, 2674 (TRUNC_32to1 Int32Regs:$pred))>; 2675def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), 2676 (SELECTi64rr Int64Regs:$a, Int64Regs:$b, 2677 (TRUNC_32to1 Int32Regs:$pred))>; 2678def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), 2679 (SELECTf32rr Float32Regs:$a, Float32Regs:$b, 2680 (TRUNC_32to1 Int32Regs:$pred))>; 2681def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b), 2682 (SELECTf64rr Float64Regs:$a, Float64Regs:$b, 2683 (TRUNC_32to1 Int32Regs:$pred))>; 2684 2685class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn, 2686 NVPTXRegClass regclassOut> : 2687 NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a), 2688 !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")), 2689 [(set regclassOut:$d, (bitconvert regclassIn:$a))]>; 2690 2691def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>; 2692def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>; 2693def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>; 2694def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>; 2695 2696// pack a set of smaller int registers to a larger int register 2697def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d), 2698 (ins Int8Regs:$s1, Int8Regs:$s2, 2699 Int8Regs:$s3, Int8Regs:$s4), 2700 !strconcat("{{\n\t.reg .b8\t%t<4>;", 2701 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", 2702 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", 2703 !strconcat("\n\tcvt.u8.u8\t%t2, $s3;", 2704 !strconcat("\n\tcvt.u8.u8\t%t3, $s4;", 2705 "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))), 2706 []>; 2707def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d), 2708 (ins Int16Regs:$s1, Int16Regs:$s2, 2709 Int16Regs:$s3, Int16Regs:$s4), 2710 "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", 2711 []>; 2712def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d), 2713 (ins Int8Regs:$s1, Int8Regs:$s2), 2714 !strconcat("{{\n\t.reg .b8\t%t<2>;", 2715 !strconcat("\n\tcvt.u8.u8\t%t0, $s1;", 2716 !strconcat("\n\tcvt.u8.u8\t%t1, $s2;", 2717 "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))), 2718 []>; 2719def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d), 2720 (ins Int16Regs:$s1, Int16Regs:$s2), 2721 "mov.b32\t$d, {{$s1, $s2}};", 2722 []>; 2723def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d), 2724 (ins Int32Regs:$s1, Int32Regs:$s2), 2725 "mov.b64\t$d, {{$s1, $s2}};", 2726 []>; 2727def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d), 2728 (ins Float32Regs:$s1, Float32Regs:$s2), 2729 "mov.b64\t$d, {{$s1, $s2}};", 2730 []>; 2731 2732// unpack a larger int register to a set of smaller int registers 2733def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2, 2734 Int8Regs:$d3, Int8Regs:$d4), 2735 (ins Int32Regs:$s), 2736 !strconcat("{{\n\t.reg .b8\t%t<4>;", 2737 !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;", 2738 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", 2739 !strconcat("\n\tcvt.u8.u8\t$d2, %t1;", 2740 !strconcat("\n\tcvt.u8.u8\t$d3, %t2;", 2741 "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))), 2742 []>; 2743def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2, 2744 Int16Regs:$d3, Int16Regs:$d4), 2745 (ins Int64Regs:$s), 2746 "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", 2747 []>; 2748def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2), 2749 (ins Int16Regs:$s), 2750 !strconcat("{{\n\t.reg .b8\t%t<2>;", 2751 !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;", 2752 !strconcat("\n\tcvt.u8.u8\t$d1, %t0;", 2753 "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))), 2754 []>; 2755def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2), 2756 (ins Int32Regs:$s), 2757 "mov.b32\t{{$d1, $d2}}, $s;", 2758 []>; 2759def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2), 2760 (ins Int64Regs:$s), 2761 "mov.b64\t{{$d1, $d2}}, $s;", 2762 []>; 2763def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2), 2764 (ins Float64Regs:$s), 2765 "mov.b64\t{{$d1, $d2}}, $s;", 2766 []>; 2767 2768def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), 2769 "cvt.rn.ftz.f32.f64 \t$d, $a;", 2770 [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>; 2771 2772def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a), 2773 "cvt.rn.f32.f64 \t$d, $a;", 2774 [(set Float32Regs:$d, (fround Float64Regs:$a))]>; 2775 2776def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), 2777 "cvt.ftz.f64.f32 \t$d, $a;", 2778 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>; 2779 2780def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a), 2781 "cvt.f64.f32 \t$d, $a;", 2782 [(set Float64Regs:$d, (fextend Float32Regs:$a))]>; 2783 2784def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone, 2785 [SDNPHasChain, SDNPOptInGlue]>; 2786 2787//----------------------------------- 2788// Control-flow 2789//----------------------------------- 2790 2791let isTerminator=1 in { 2792 let isReturn=1, isBarrier=1 in 2793 def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>; 2794 2795 let isBranch=1 in 2796 def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 2797 "@$a bra \t$target;", 2798 [(brcond Int1Regs:$a, bb:$target)]>; 2799 let isBranch=1 in 2800 def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target), 2801 "@!$a bra \t$target;", 2802 []>; 2803 2804 let isBranch=1, isBarrier=1 in 2805 def GOTO : NVPTXInst<(outs), (ins brtarget:$target), 2806 "bra.uni \t$target;", 2807 [(br bb:$target)]>; 2808} 2809 2810def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch 2811 (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>; 2812 2813// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a 2814// conditional branch if 2815// the target block is the next block so that the code can fall through to the 2816// target block. 2817// The invertion is done by 'xor condition, 1', which will be translated to 2818// (setne condition, -1). 2819// Since ptx supports '@!pred bra target', we should use it. 2820def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target), 2821 (CBranchOther Int1Regs:$a, bb:$target)>; 2822 2823// Call 2824def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; 2825def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, 2826 SDTCisVT<1, i32> ]>; 2827 2828def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart, 2829 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; 2830def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd, 2831 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 2832 SDNPSideEffect]>; 2833 2834def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; 2835def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall, 2836 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 2837def calltarget : Operand<i32>; 2838let isCall=1 in { 2839 def CALL : NVPTXInst<(outs), (ins calltarget:$dst), 2840 "call \t$dst, (1);", []>; 2841} 2842 2843def : Pat<(call tglobaladdr:$dst), 2844 (CALL tglobaladdr:$dst)>; 2845def : Pat<(call texternalsym:$dst), 2846 (CALL texternalsym:$dst)>; 2847 2848// Pseudo instructions. 2849class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern> 2850 : NVPTXInst<outs, ins, asmstr, pattern>; 2851 2852// @TODO: We use some tricks here to emit curly braces. Can we clean this up 2853// a bit without TableGen modifications? 2854def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt), 2855 "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}", 2856 [(callseq_start timm:$amt)]>; 2857def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), 2858 "\n\t//{{\n\t}}// Callseq End $amt1", 2859 [(callseq_end timm:$amt1, timm:$amt2)]>; 2860 2861// trap instruction 2862 2863def trapinst : NVPTXInst<(outs), (ins), 2864 "trap;", 2865 [(trap)]>; 2866 2867include "NVPTXIntrinsics.td" 2868 2869 2870//----------------------------------- 2871// Notes 2872//----------------------------------- 2873// BSWAP is currently expanded. The following is a more efficient 2874// - for < sm_20, use vector scalar mov, as tesla support native 16-bit register 2875// - for sm_20, use pmpt (use vector scalar mov to get the pack and 2876// unpack). sm_20 supports native 32-bit register, but not native 16-bit 2877// register. 2878