1//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// R600 Tablegen instruction definitions 11// 12//===----------------------------------------------------------------------===// 13 14include "R600Intrinsics.td" 15 16class InstR600 <bits<32> inst, dag outs, dag ins, string asm, list<dag> pattern, 17 InstrItinClass itin> 18 : AMDGPUInst <outs, ins, asm, pattern> { 19 20 field bits<32> Inst; 21 bit Trig = 0; 22 bit Op3 = 0; 23 bit isVector = 0; 24 bits<2> FlagOperandIdx = 0; 25 26 let Inst = inst; 27 let Namespace = "AMDGPU"; 28 let OutOperandList = outs; 29 let InOperandList = ins; 30 let AsmString = asm; 31 let Pattern = pattern; 32 let Itinerary = itin; 33 34 let TSFlags{4} = Trig; 35 let TSFlags{5} = Op3; 36 37 // Vector instructions are instructions that must fill all slots in an 38 // instruction group 39 let TSFlags{6} = isVector; 40 let TSFlags{8-7} = FlagOperandIdx; 41} 42 43class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> : 44 AMDGPUInst <outs, ins, asm, pattern> 45{ 46 field bits<64> Inst; 47 48 let Namespace = "AMDGPU"; 49} 50 51def MEMxi : Operand<iPTR> { 52 let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); 53} 54 55def MEMrr : Operand<iPTR> { 56 let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); 57} 58 59def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>; 60def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>; 61def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>; 62 63class R600_ALU { 64 65 bits<7> DST_GPR = 0; 66 bits<9> SRC0_SEL = 0; 67 bits<1> SRC0_NEG = 0; 68 bits<9> SRC1_SEL = 0; 69 bits<1> SRC1_NEG = 0; 70 bits<1> CLAMP = 0; 71 72} 73 74def R600_Pred : PredicateOperand<i32, (ops R600_Predicate), 75 (ops PRED_SEL_OFF)>; 76 77 78class R600_1OP <bits<32> inst, string opName, list<dag> pattern, 79 InstrItinClass itin = AnyALU> : 80 InstR600 <inst, 81 (outs R600_Reg32:$dst), 82 (ins R600_Reg32:$src, R600_Pred:$p, variable_ops), 83 !strconcat(opName, " $dst, $src ($p)"), 84 pattern, 85 itin 86 >; 87 88class R600_2OP <bits<32> inst, string opName, list<dag> pattern, 89 InstrItinClass itin = AnyALU> : 90 InstR600 <inst, 91 (outs R600_Reg32:$dst), 92 (ins R600_Reg32:$src0, R600_Reg32:$src1,R600_Pred:$p, variable_ops), 93 !strconcat(opName, " $dst, $src0, $src1"), 94 pattern, 95 itin 96 >; 97 98class R600_3OP <bits<32> inst, string opName, list<dag> pattern, 99 InstrItinClass itin = AnyALU> : 100 InstR600 <inst, 101 (outs R600_Reg32:$dst), 102 (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2,R600_Pred:$p, variable_ops), 103 !strconcat(opName, " $dst, $src0, $src1, $src2"), 104 pattern, 105 itin>{ 106 107 let Op3 = 1; 108 } 109 110 111 112def PRED_X : InstR600 <0, (outs R600_Predicate_Bit:$dst), 113 (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), 114 "PRED $dst, $src0, $src1", 115 [], NullALU> 116{ 117 let DisableEncoding = "$src0"; 118 field bits<32> Inst; 119 bits<32> src1; 120 121 let Inst = src1; 122 let FlagOperandIdx = 3; 123} 124 125let isTerminator = 1, isBranch = 1, isPseudo = 1 in { 126def JUMP : InstR600 <0x10, 127 (outs), 128 (ins brtarget:$target, R600_Pred:$p), 129 "JUMP $target ($p)", 130 [], AnyALU 131 >; 132} 133 134class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern, 135 InstrItinClass itin = VecALU> : 136 InstR600 <inst, 137 (outs R600_Reg32:$dst), 138 ins, 139 asm, 140 pattern, 141 itin 142 143 >; 144 145class R600_TEX <bits<32> inst, string opName, list<dag> pattern, 146 InstrItinClass itin = AnyALU> : 147 InstR600 <inst, 148 (outs R600_Reg128:$dst), 149 (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2), 150 !strconcat(opName, "$dst, $src0, $src1, $src2"), 151 pattern, 152 itin 153 >; 154 155def TEX_SHADOW : PatLeaf< 156 (imm), 157 [{uint32_t TType = (uint32_t)N->getZExtValue(); 158 return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12; 159 }] 160>; 161 162class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, 163 dag ins, string asm, list<dag> pattern> : 164 InstR600ISA <outs, ins, asm, pattern> 165{ 166 bits<7> RW_GPR; 167 bits<7> INDEX_GPR; 168 169 bits<2> RIM; 170 bits<2> TYPE; 171 bits<1> RW_REL; 172 bits<2> ELEM_SIZE; 173 174 bits<12> ARRAY_SIZE; 175 bits<4> COMP_MASK; 176 bits<4> BURST_COUNT; 177 bits<1> VPM; 178 bits<1> eop; 179 bits<1> MARK; 180 bits<1> BARRIER; 181 182 // CF_ALLOC_EXPORT_WORD0_RAT 183 let Inst{3-0} = rat_id; 184 let Inst{9-4} = rat_inst; 185 let Inst{10} = 0; // Reserved 186 let Inst{12-11} = RIM; 187 let Inst{14-13} = TYPE; 188 let Inst{21-15} = RW_GPR; 189 let Inst{22} = RW_REL; 190 let Inst{29-23} = INDEX_GPR; 191 let Inst{31-30} = ELEM_SIZE; 192 193 // CF_ALLOC_EXPORT_WORD1_BUF 194 let Inst{43-32} = ARRAY_SIZE; 195 let Inst{47-44} = COMP_MASK; 196 let Inst{51-48} = BURST_COUNT; 197 let Inst{52} = VPM; 198 let Inst{53} = eop; 199 let Inst{61-54} = cf_inst; 200 let Inst{62} = MARK; 201 let Inst{63} = BARRIER; 202} 203 204def load_param : PatFrag<(ops node:$ptr), 205 (load node:$ptr), 206 [{ 207 const Value *Src = cast<LoadSDNode>(N)->getSrcValue(); 208 if (Src) { 209 PointerType * PT = dyn_cast<PointerType>(Src->getType()); 210 return PT && PT->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS; 211 } 212 return false; 213 }]>; 214 215def isR600 : Predicate<"Subtarget.device()" 216 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX">; 217def isR700 : Predicate<"Subtarget.device()" 218 "->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&" 219 "Subtarget.device()->getDeviceFlag()" 220 ">= OCL_DEVICE_RV710">; 221def isEG : Predicate<"Subtarget.device()" 222 "->getGeneration() >= AMDGPUDeviceInfo::HD5XXX && " 223 "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; 224def isCayman : Predicate<"Subtarget.device()" 225 "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; 226def isEGorCayman : Predicate<"Subtarget.device()" 227 "->getGeneration() == AMDGPUDeviceInfo::HD5XXX" 228 "|| Subtarget.device()->getGeneration() ==" 229 "AMDGPUDeviceInfo::HD6XXX">; 230 231def isR600toCayman : Predicate< 232 "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">; 233 234 235let Predicates = [isR600toCayman] in { 236 237//===----------------------------------------------------------------------===// 238// Common Instructions R600, R700, Evergreen, Cayman 239//===----------------------------------------------------------------------===// 240 241def ADD : R600_2OP < 242 0x0, "ADD", 243 [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))] 244>; 245 246// Non-IEEE MUL: 0 * anything = 0 247def MUL : R600_2OP < 248 0x1, "MUL NON-IEEE", 249 [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))] 250>; 251 252def MUL_IEEE : R600_2OP < 253 0x2, "MUL_IEEE", 254 [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))] 255>; 256 257def MAX : R600_2OP < 258 0x3, "MAX", 259 [(set R600_Reg32:$dst, (AMDGPUfmax R600_Reg32:$src0, R600_Reg32:$src1))] 260>; 261 262def MIN : R600_2OP < 263 0x4, "MIN", 264 [(set R600_Reg32:$dst, (AMDGPUfmin R600_Reg32:$src0, R600_Reg32:$src1))] 265>; 266 267// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, 268// so some of the instruction names don't match the asm string. 269// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. 270 271def SETE : R600_2OP < 272 0x08, "SETE", 273 [(set R600_Reg32:$dst, 274 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 275 COND_EQ))] 276>; 277 278def SGT : R600_2OP < 279 0x09, "SETGT", 280 [(set R600_Reg32:$dst, 281 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 282 COND_GT))] 283>; 284 285def SGE : R600_2OP < 286 0xA, "SETGE", 287 [(set R600_Reg32:$dst, 288 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 289 COND_GE))] 290>; 291 292def SNE : R600_2OP < 293 0xB, "SETNE", 294 [(set R600_Reg32:$dst, 295 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, 296 COND_NE))] 297>; 298 299def FRACT : R600_1OP < 300 0x10, "FRACT", 301 [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))] 302>; 303 304def TRUNC : R600_1OP < 305 0x11, "TRUNC", 306 [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))] 307>; 308 309def CEIL : R600_1OP < 310 0x12, "CEIL", 311 [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))] 312>; 313 314def RNDNE : R600_1OP < 315 0x13, "RNDNE", 316 [(set R600_Reg32:$dst, (frint R600_Reg32:$src))] 317>; 318 319def FLOOR : R600_1OP < 320 0x14, "FLOOR", 321 [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))] 322>; 323 324def MOV : InstR600 <0x19, (outs R600_Reg32:$dst), 325 (ins R600_Reg32:$src0, i32imm:$flags, 326 R600_Pred:$p), 327 "MOV $dst, $src0", [], AnyALU> { 328 let FlagOperandIdx = 2; 329} 330 331class MOV_IMM <ValueType vt, Operand immType> : InstR600 <0x19, 332 (outs R600_Reg32:$dst), 333 (ins R600_Reg32:$alu_literal, R600_Pred:$p, immType:$imm), 334 "MOV_IMM $dst, $imm", 335 [], AnyALU 336>; 337 338def MOV_IMM_I32 : MOV_IMM<i32, i32imm>; 339def : Pat < 340 (imm:$val), 341 (MOV_IMM_I32 (i32 ALU_LITERAL_X), imm:$val) 342>; 343 344def MOV_IMM_F32 : MOV_IMM<f32, f32imm>; 345def : Pat < 346 (fpimm:$val), 347 (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val) 348>; 349 350def KILLGT : InstR600 <0x2D, 351 (outs R600_Reg32:$dst), 352 (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags, R600_Pred:$p, 353 variable_ops), 354 "KILLGT $dst, $src0, $src1, $flags ($p)", 355 [], 356 NullALU>{ 357 let FlagOperandIdx = 3; 358} 359 360def AND_INT : R600_2OP < 361 0x30, "AND_INT", 362 [(set R600_Reg32:$dst, (and R600_Reg32:$src0, R600_Reg32:$src1))] 363>; 364 365def OR_INT : R600_2OP < 366 0x31, "OR_INT", 367 [(set R600_Reg32:$dst, (or R600_Reg32:$src0, R600_Reg32:$src1))] 368>; 369 370def XOR_INT : R600_2OP < 371 0x32, "XOR_INT", 372 [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))] 373>; 374 375def NOT_INT : R600_1OP < 376 0x33, "NOT_INT", 377 [(set R600_Reg32:$dst, (not R600_Reg32:$src))] 378>; 379 380def ADD_INT : R600_2OP < 381 0x34, "ADD_INT", 382 [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))] 383>; 384 385def SUB_INT : R600_2OP < 386 0x35, "SUB_INT", 387 [(set R600_Reg32:$dst, (sub R600_Reg32:$src0, R600_Reg32:$src1))] 388>; 389 390def MAX_INT : R600_2OP < 391 0x36, "MAX_INT", 392 [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]>; 393 394def MIN_INT : R600_2OP < 395 0x37, "MIN_INT", 396 [(set R600_Reg32:$dst, (AMDGPUsmin R600_Reg32:$src0, R600_Reg32:$src1))]>; 397 398def MAX_UINT : R600_2OP < 399 0x38, "MAX_UINT", 400 [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))] 401>; 402 403def MIN_UINT : R600_2OP < 404 0x39, "MIN_UINT", 405 [(set R600_Reg32:$dst, (AMDGPUumin R600_Reg32:$src0, R600_Reg32:$src1))] 406>; 407 408def SETE_INT : R600_2OP < 409 0x3A, "SETE_INT", 410 [(set (i32 R600_Reg32:$dst), 411 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] 412>; 413 414def SETGT_INT : R600_2OP < 415 0x3B, "SGT_INT", 416 [(set (i32 R600_Reg32:$dst), 417 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] 418>; 419 420def SETGE_INT : R600_2OP < 421 0x3C, "SETGE_INT", 422 [(set (i32 R600_Reg32:$dst), 423 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] 424>; 425 426def SETNE_INT : R600_2OP < 427 0x3D, "SETNE_INT", 428 [(set (i32 R600_Reg32:$dst), 429 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] 430>; 431 432def SETGT_UINT : R600_2OP < 433 0x3E, "SETGT_UINT", 434 [(set (i32 R600_Reg32:$dst), 435 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] 436>; 437 438def SETGE_UINT : R600_2OP < 439 0x3F, "SETGE_UINT", 440 [(set (i32 R600_Reg32:$dst), 441 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] 442>; 443 444def CNDE_INT : R600_3OP < 445 0x1C, "CNDE_INT", 446 [(set (i32 R600_Reg32:$dst), 447 (select R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] 448>; 449 450//===----------------------------------------------------------------------===// 451// Texture instructions 452//===----------------------------------------------------------------------===// 453 454def TEX_LD : R600_TEX < 455 0x03, "TEX_LD", 456 [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$src4, imm:$src5))] 457> { 458let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $src4, $src5"; 459let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5); 460} 461 462def TEX_GET_TEXTURE_RESINFO : R600_TEX < 463 0x04, "TEX_GET_TEXTURE_RESINFO", 464 [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))] 465>; 466 467def TEX_GET_GRADIENTS_H : R600_TEX < 468 0x07, "TEX_GET_GRADIENTS_H", 469 [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))] 470>; 471 472def TEX_GET_GRADIENTS_V : R600_TEX < 473 0x08, "TEX_GET_GRADIENTS_V", 474 [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))] 475>; 476 477def TEX_SET_GRADIENTS_H : R600_TEX < 478 0x0B, "TEX_SET_GRADIENTS_H", 479 [] 480>; 481 482def TEX_SET_GRADIENTS_V : R600_TEX < 483 0x0C, "TEX_SET_GRADIENTS_V", 484 [] 485>; 486 487def TEX_SAMPLE : R600_TEX < 488 0x10, "TEX_SAMPLE", 489 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))] 490>; 491 492def TEX_SAMPLE_C : R600_TEX < 493 0x18, "TEX_SAMPLE_C", 494 [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] 495>; 496 497def TEX_SAMPLE_L : R600_TEX < 498 0x11, "TEX_SAMPLE_L", 499 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))] 500>; 501 502def TEX_SAMPLE_C_L : R600_TEX < 503 0x19, "TEX_SAMPLE_C_L", 504 [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] 505>; 506 507def TEX_SAMPLE_LB : R600_TEX < 508 0x12, "TEX_SAMPLE_LB", 509 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))] 510>; 511 512def TEX_SAMPLE_C_LB : R600_TEX < 513 0x1A, "TEX_SAMPLE_C_LB", 514 [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] 515>; 516 517def TEX_SAMPLE_G : R600_TEX < 518 0x14, "TEX_SAMPLE_G", 519 [] 520>; 521 522def TEX_SAMPLE_C_G : R600_TEX < 523 0x1C, "TEX_SAMPLE_C_G", 524 [] 525>; 526 527//===----------------------------------------------------------------------===// 528// Helper classes for common instructions 529//===----------------------------------------------------------------------===// 530 531class MUL_LIT_Common <bits<32> inst> : R600_3OP < 532 inst, "MUL_LIT", 533 [] 534>; 535 536class MULADD_Common <bits<32> inst> : R600_3OP < 537 inst, "MULADD", 538 [(set (f32 R600_Reg32:$dst), 539 (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] 540>; 541 542class CNDE_Common <bits<32> inst> : R600_3OP < 543 inst, "CNDE", 544 [(set (f32 R600_Reg32:$dst), 545 (select (i32 (fp_to_sint (fneg R600_Reg32:$src0))), (f32 R600_Reg32:$src2), (f32 R600_Reg32:$src1)))] 546>; 547 548class CNDGT_Common <bits<32> inst> : R600_3OP < 549 inst, "CNDGT", 550 [] 551>; 552 553class CNDGE_Common <bits<32> inst> : R600_3OP < 554 inst, "CNDGE", 555 [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] 556>; 557 558class DOT4_Common <bits<32> inst> : R600_REDUCTION < 559 inst, 560 (ins R600_Reg128:$src0, R600_Reg128:$src1, i32imm:$flags), 561 "DOT4 $dst $src0, $src1", 562 [] 563 > { 564 let FlagOperandIdx = 3; 565} 566 567class DOT4_Pat <Instruction dot4> : Pat < 568 (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1), 569 (dot4 R600_Reg128:$src0, R600_Reg128:$src1, 0) 570>; 571 572multiclass CUBE_Common <bits<32> inst> { 573 574 def _pseudo : InstR600 < 575 inst, 576 (outs R600_Reg128:$dst), 577 (ins R600_Reg128:$src), 578 "CUBE $dst $src", 579 [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], 580 VecALU 581 >; 582 583 def _real : InstR600 < 584 inst, 585 (outs R600_Reg32:$dst), 586 (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags), 587 "CUBE $dst, $src0, $src1", 588 [], VecALU 589 >{ 590 let FlagOperandIdx = 3; 591 } 592} 593 594class EXP_IEEE_Common <bits<32> inst> : R600_1OP < 595 inst, "EXP_IEEE", 596 [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))] 597>; 598 599class FLT_TO_INT_Common <bits<32> inst> : R600_1OP < 600 inst, "FLT_TO_INT", 601 [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] 602>; 603 604class INT_TO_FLT_Common <bits<32> inst> : R600_1OP < 605 inst, "INT_TO_FLT", 606 [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] 607>; 608 609class FLT_TO_UINT_Common <bits<32> inst> : R600_1OP < 610 inst, "FLT_TO_UINT", 611 [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))] 612>; 613 614class UINT_TO_FLT_Common <bits<32> inst> : R600_1OP < 615 inst, "UINT_TO_FLT", 616 [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))] 617>; 618 619class LOG_CLAMPED_Common <bits<32> inst> : R600_1OP < 620 inst, "LOG_CLAMPED", 621 [] 622>; 623 624class LOG_IEEE_Common <bits<32> inst> : R600_1OP < 625 inst, "LOG_IEEE", 626 [(set R600_Reg32:$dst, (int_AMDIL_log R600_Reg32:$src))] 627>; 628 629class LSHL_Common <bits<32> inst> : R600_2OP < 630 inst, "LSHL $dst, $src0, $src1", 631 [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))] 632>; 633 634class LSHR_Common <bits<32> inst> : R600_2OP < 635 inst, "LSHR $dst, $src0, $src1", 636 [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))] 637>; 638 639class ASHR_Common <bits<32> inst> : R600_2OP < 640 inst, "ASHR $dst, $src0, $src1", 641 [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))] 642>; 643 644class MULHI_INT_Common <bits<32> inst> : R600_2OP < 645 inst, "MULHI_INT $dst, $src0, $src1", 646 [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))] 647>; 648 649class MULHI_UINT_Common <bits<32> inst> : R600_2OP < 650 inst, "MULHI $dst, $src0, $src1", 651 [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))] 652>; 653 654class MULLO_INT_Common <bits<32> inst> : R600_2OP < 655 inst, "MULLO_INT $dst, $src0, $src1", 656 [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))] 657>; 658 659class MULLO_UINT_Common <bits<32> inst> : R600_2OP < 660 inst, "MULLO_UINT $dst, $src0, $src1", 661 [] 662>; 663 664class RECIP_CLAMPED_Common <bits<32> inst> : R600_1OP < 665 inst, "RECIP_CLAMPED", 666 [] 667>; 668 669class RECIP_IEEE_Common <bits<32> inst> : R600_1OP < 670 inst, "RECIP_IEEE", 671 [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))] 672>; 673 674class RECIP_UINT_Common <bits<32> inst> : R600_1OP < 675 inst, "RECIP_INT $dst, $src", 676 [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] 677>; 678 679class RECIPSQRT_CLAMPED_Common <bits<32> inst> : R600_1OP < 680 inst, "RECIPSQRT_CLAMPED", 681 [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))] 682>; 683 684class RECIPSQRT_IEEE_Common <bits<32> inst> : R600_1OP < 685 inst, "RECIPSQRT_IEEE", 686 [] 687>; 688 689class SIN_Common <bits<32> inst> : R600_1OP < 690 inst, "SIN", []>{ 691 let Trig = 1; 692} 693 694class COS_Common <bits<32> inst> : R600_1OP < 695 inst, "COS", []> { 696 let Trig = 1; 697} 698 699//===----------------------------------------------------------------------===// 700// Helper patterns for complex intrinsics 701//===----------------------------------------------------------------------===// 702 703class DIV_Common <InstR600 recip_ieee> : Pat< 704 (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), 705 (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) 706>; 707 708class SSG_Common <InstR600 cndgt, InstR600 cndge> : Pat < 709 (int_AMDGPU_ssg R600_Reg32:$src), 710 (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE))) 711>; 712 713class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat < 714 (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), 715 (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) 716>; 717 718//===----------------------------------------------------------------------===// 719// R600 / R700 Instructions 720//===----------------------------------------------------------------------===// 721 722let Predicates = [isR600] in { 723 724 def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; 725 def MULADD_r600 : MULADD_Common<0x10>; 726 def CNDE_r600 : CNDE_Common<0x18>; 727 def CNDGT_r600 : CNDGT_Common<0x19>; 728 def CNDGE_r600 : CNDGE_Common<0x1A>; 729 def DOT4_r600 : DOT4_Common<0x50>; 730 def : DOT4_Pat <DOT4_r600>; 731 defm CUBE_r600 : CUBE_Common<0x52>; 732 def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; 733 def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; 734 def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; 735 def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; 736 def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; 737 def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; 738 def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; 739 def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; 740 def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; 741 def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; 742 def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; 743 def SIN_r600 : SIN_Common<0x6E>; 744 def COS_r600 : COS_Common<0x6F>; 745 def ASHR_r600 : ASHR_Common<0x70>; 746 def LSHR_r600 : LSHR_Common<0x71>; 747 def LSHL_r600 : LSHL_Common<0x72>; 748 def MULLO_INT_r600 : MULLO_INT_Common<0x73>; 749 def MULHI_INT_r600 : MULHI_INT_Common<0x74>; 750 def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; 751 def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; 752 def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; 753 754 def DIV_r600 : DIV_Common<RECIP_IEEE_r600>; 755 def POW_r600 : POW_Common<LOG_IEEE_r600, EXP_IEEE_r600, MUL, GPRF32>; 756 def SSG_r600 : SSG_Common<CNDGT_r600, CNDGE_r600>; 757 def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; 758 759} 760 761// Helper pattern for normalizing inputs to triginomic instructions for R700+ 762// cards. 763class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat< 764 (intr R600_Reg32:$src), 765 (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src)) 766>; 767 768//===----------------------------------------------------------------------===// 769// R700 Only instructions 770//===----------------------------------------------------------------------===// 771 772let Predicates = [isR700] in { 773 def SIN_r700 : SIN_Common<0x6E>; 774 def COS_r700 : COS_Common<0x6F>; 775 776 // R700 normalizes inputs to SIN/COS the same as EG 777 def : TRIG_eg <SIN_r700, int_AMDGPU_sin>; 778 def : TRIG_eg <COS_r700, int_AMDGPU_cos>; 779} 780 781//===----------------------------------------------------------------------===// 782// Evergreen Only instructions 783//===----------------------------------------------------------------------===// 784 785let Predicates = [isEG] in { 786 787def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; 788 789def MULLO_INT_eg : MULLO_INT_Common<0x8F>; 790def MULHI_INT_eg : MULHI_INT_Common<0x90>; 791def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; 792def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; 793def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; 794 795} // End Predicates = [isEG] 796 797//===----------------------------------------------------------------------===// 798// Evergreen / Cayman Instructions 799//===----------------------------------------------------------------------===// 800 801let Predicates = [isEGorCayman] in { 802 803 // BFE_UINT - bit_extract, an optimization for mask and shift 804 // Src0 = Input 805 // Src1 = Offset 806 // Src2 = Width 807 // 808 // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) 809 // 810 // Example Usage: 811 // (Offset, Width) 812 // 813 // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 814 // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 815 // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 816 // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 817 def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", 818 [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, 819 R600_Reg32:$src1, 820 R600_Reg32:$src2))], 821 VecALU 822 >; 823 824 def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", 825 [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, 826 R600_Reg32:$src2))], 827 VecALU 828 >; 829 830 def MULADD_eg : MULADD_Common<0x14>; 831 def ASHR_eg : ASHR_Common<0x15>; 832 def LSHR_eg : LSHR_Common<0x16>; 833 def LSHL_eg : LSHL_Common<0x17>; 834 def CNDE_eg : CNDE_Common<0x19>; 835 def CNDGT_eg : CNDGT_Common<0x1A>; 836 def CNDGE_eg : CNDGE_Common<0x1B>; 837 def MUL_LIT_eg : MUL_LIT_Common<0x1F>; 838 def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; 839 def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; 840 def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; 841 def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; 842 def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; 843 def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; 844 def SIN_eg : SIN_Common<0x8D>; 845 def COS_eg : COS_Common<0x8E>; 846 def DOT4_eg : DOT4_Common<0xBE>; 847 def : DOT4_Pat <DOT4_eg>; 848 defm CUBE_eg : CUBE_Common<0xC0>; 849 850 def DIV_eg : DIV_Common<RECIP_IEEE_eg>; 851 def POW_eg : POW_Common<LOG_IEEE_eg, EXP_IEEE_eg, MUL, GPRF32>; 852 def SSG_eg : SSG_Common<CNDGT_eg, CNDGE_eg>; 853 def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common<MUL_LIT_eg, LOG_CLAMPED_eg, EXP_IEEE_eg>; 854 855 def : TRIG_eg <SIN_eg, int_AMDGPU_sin>; 856 def : TRIG_eg <COS_eg, int_AMDGPU_cos>; 857 858 def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { 859 let Pattern = []; 860 } 861 862 def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; 863 864 def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { 865 let Pattern = []; 866 } 867 868 def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; 869 870 def : Pat<(fp_to_sint R600_Reg32:$src), 871 (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>; 872 873 def : Pat<(fp_to_uint R600_Reg32:$src), 874 (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>; 875 876//===----------------------------------------------------------------------===// 877// Memory read/write instructions 878//===----------------------------------------------------------------------===// 879 880let usesCustomInserter = 1 in { 881 882def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), 883 (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$eop), 884 "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr, $eop", 885 []> 886{ 887 let RIM = 0; 888 // XXX: Have a separate instruction for non-indexed writes. 889 let TYPE = 1; 890 let RW_REL = 0; 891 let ELEM_SIZE = 0; 892 893 let ARRAY_SIZE = 0; 894 let COMP_MASK = 1; 895 let BURST_COUNT = 0; 896 let VPM = 0; 897 let MARK = 0; 898 let BARRIER = 1; 899} 900 901} // End usesCustomInserter = 1 902 903// i32 global_store 904def : Pat < 905 (global_store (i32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), 906 (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) 907>; 908 909// Floating point global_store 910def : Pat < 911 (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), 912 (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr, 0) 913>; 914 915class VTX_READ_eg <bits<8> buffer_id, dag outs, list<dag> pattern> 916 : InstR600ISA <outs, (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", pattern> { 917 918 // Operands 919 bits<7> DST_GPR; 920 bits<7> SRC_GPR; 921 922 // Static fields 923 bits<5> VC_INST = 0; 924 bits<2> FETCH_TYPE = 2; 925 bits<1> FETCH_WHOLE_QUAD = 0; 926 bits<8> BUFFER_ID = buffer_id; 927 bits<1> SRC_REL = 0; 928 // XXX: We can infer this field based on the SRC_GPR. This would allow us 929 // to store vertex addresses in any channel, not just X. 930 bits<2> SRC_SEL_X = 0; 931 bits<6> MEGA_FETCH_COUNT; 932 bits<1> DST_REL = 0; 933 bits<3> DST_SEL_X; 934 bits<3> DST_SEL_Y; 935 bits<3> DST_SEL_Z; 936 bits<3> DST_SEL_W; 937 // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, 938 // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, 939 // however, based on my testing if USE_CONST_FIELDS is set, then all 940 // these fields need to be set to 0. 941 bits<1> USE_CONST_FIELDS = 0; 942 bits<6> DATA_FORMAT; 943 bits<2> NUM_FORMAT_ALL = 1; 944 bits<1> FORMAT_COMP_ALL = 0; 945 bits<1> SRF_MODE_ALL = 0; 946 947 // LLVM can only encode 64-bit instructions, so these fields are manually 948 // encoded in R600CodeEmitter 949 // 950 // bits<16> OFFSET; 951 // bits<2> ENDIAN_SWAP = 0; 952 // bits<1> CONST_BUF_NO_STRIDE = 0; 953 // bits<1> MEGA_FETCH = 0; 954 // bits<1> ALT_CONST = 0; 955 // bits<2> BUFFER_INDEX_MODE = 0; 956 957 // VTX_WORD0 958 let Inst{4-0} = VC_INST; 959 let Inst{6-5} = FETCH_TYPE; 960 let Inst{7} = FETCH_WHOLE_QUAD; 961 let Inst{15-8} = BUFFER_ID; 962 let Inst{22-16} = SRC_GPR; 963 let Inst{23} = SRC_REL; 964 let Inst{25-24} = SRC_SEL_X; 965 let Inst{31-26} = MEGA_FETCH_COUNT; 966 967 // VTX_WORD1_GPR 968 let Inst{38-32} = DST_GPR; 969 let Inst{39} = DST_REL; 970 let Inst{40} = 0; // Reserved 971 let Inst{43-41} = DST_SEL_X; 972 let Inst{46-44} = DST_SEL_Y; 973 let Inst{49-47} = DST_SEL_Z; 974 let Inst{52-50} = DST_SEL_W; 975 let Inst{53} = USE_CONST_FIELDS; 976 let Inst{59-54} = DATA_FORMAT; 977 let Inst{61-60} = NUM_FORMAT_ALL; 978 let Inst{62} = FORMAT_COMP_ALL; 979 let Inst{63} = SRF_MODE_ALL; 980 981 // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding 982 // is done in R600CodeEmitter 983 // 984 // Inst{79-64} = OFFSET; 985 // Inst{81-80} = ENDIAN_SWAP; 986 // Inst{82} = CONST_BUF_NO_STRIDE; 987 // Inst{83} = MEGA_FETCH; 988 // Inst{84} = ALT_CONST; 989 // Inst{86-85} = BUFFER_INDEX_MODE; 990 // Inst{95-86} = 0; Reserved 991 992 // VTX_WORD3 (Padding) 993 // 994 // Inst{127-96} = 0; 995} 996 997class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern> 998 : VTX_READ_eg <buffer_id, (outs R600_TReg32_X:$dst), pattern> { 999 1000 let MEGA_FETCH_COUNT = 4; 1001 let DST_SEL_X = 0; 1002 let DST_SEL_Y = 7; // Masked 1003 let DST_SEL_Z = 7; // Masked 1004 let DST_SEL_W = 7; // Masked 1005 let DATA_FORMAT = 0xD; // COLOR_32 1006 1007 // This is not really necessary, but there were some GPU hangs that appeared 1008 // to be caused by ALU instructions in the next instruction group that wrote 1009 // to the $ptr registers of the VTX_READ. 1010 // e.g. 1011 // %T3_X<def> = VTX_READ_PARAM_i32_eg %T2_X<kill>, 24 1012 // %T2_X<def> = MOV %ZERO 1013 //Adding this constraint prevents this from happening. 1014 let Constraints = "$ptr.ptr = $dst"; 1015} 1016 1017class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern> 1018 : VTX_READ_eg <buffer_id, (outs R600_Reg128:$dst), pattern> { 1019 1020 let MEGA_FETCH_COUNT = 16; 1021 let DST_SEL_X = 0; 1022 let DST_SEL_Y = 1; 1023 let DST_SEL_Z = 2; 1024 let DST_SEL_W = 3; 1025 let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 1026 1027 // XXX: Need to force VTX_READ_128 instructions to write to the same register 1028 // that holds its buffer address to avoid potential hangs. We can't use 1029 // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst 1030 // registers are different sizes. 1031} 1032 1033//===----------------------------------------------------------------------===// 1034// VTX Read from parameter memory space 1035//===----------------------------------------------------------------------===// 1036 1037class VTX_READ_PARAM_32_eg <ValueType vt> : VTX_READ_32_eg <0, 1038 [(set (vt R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] 1039>; 1040 1041def VTX_READ_PARAM_i32_eg : VTX_READ_PARAM_32_eg<i32>; 1042def VTX_READ_PARAM_f32_eg : VTX_READ_PARAM_32_eg<f32>; 1043 1044 1045//===----------------------------------------------------------------------===// 1046// VTX Read from global memory space 1047//===----------------------------------------------------------------------===// 1048 1049// 32-bit reads 1050 1051class VTX_READ_GLOBAL_eg <ValueType vt> : VTX_READ_32_eg <1, 1052 [(set (vt R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] 1053>; 1054 1055def VTX_READ_GLOBAL_i32_eg : VTX_READ_GLOBAL_eg<i32>; 1056def VTX_READ_GLOBAL_f32_eg : VTX_READ_GLOBAL_eg<f32>; 1057 1058// 128-bit reads 1059 1060class VTX_READ_GLOBAL_128_eg <ValueType vt> : VTX_READ_128_eg <1, 1061 [(set (vt R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] 1062>; 1063 1064def VTX_READ_GLOBAL_v4i32_eg : VTX_READ_GLOBAL_128_eg<v4i32>; 1065def VTX_READ_GLOBAL_v4f32_eg : VTX_READ_GLOBAL_128_eg<v4f32>; 1066 1067} 1068 1069let Predicates = [isCayman] in { 1070 1071let isVector = 1 in { 1072 1073def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; 1074 1075def MULLO_INT_cm : MULLO_INT_Common<0x8F>; 1076def MULHI_INT_cm : MULHI_INT_Common<0x90>; 1077def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; 1078def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; 1079 1080} // End isVector = 1 1081 1082// RECIP_UINT emulation for Cayman 1083def : Pat < 1084 (AMDGPUurecip R600_Reg32:$src0), 1085 (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), 1086 (MOV_IMM_I32 (i32 ALU_LITERAL_X), 0x4f800000))) 1087>; 1088 1089} // End isCayman 1090 1091let isCodeGenOnly = 1 in { 1092 1093 def MULLIT : AMDGPUShaderInst < 1094 (outs R600_Reg128:$dst), 1095 (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2), 1096 "MULLIT $dst, $src0, $src1", 1097 [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] 1098 >; 1099 1100let usesCustomInserter = 1, isPseudo = 1 in { 1101 1102class R600PreloadInst <string asm, Intrinsic intr> : AMDGPUInst < 1103 (outs R600_TReg32:$dst), 1104 (ins), 1105 asm, 1106 [(set R600_TReg32:$dst, (intr))] 1107>; 1108 1109def R600_LOAD_CONST : AMDGPUShaderInst < 1110 (outs R600_Reg32:$dst), 1111 (ins i32imm:$src0), 1112 "R600_LOAD_CONST $dst, $src0", 1113 [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] 1114>; 1115 1116def RESERVE_REG : AMDGPUShaderInst < 1117 (outs), 1118 (ins i32imm:$src), 1119 "RESERVE_REG $src", 1120 [(int_AMDGPU_reserve_reg imm:$src)] 1121>; 1122 1123def TXD: AMDGPUShaderInst < 1124 (outs R600_Reg128:$dst), 1125 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), 1126 "TXD $dst, $src0, $src1, $src2, $src3, $src4", 1127 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))] 1128>; 1129 1130def TXD_SHADOW: AMDGPUShaderInst < 1131 (outs R600_Reg128:$dst), 1132 (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), 1133 "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4", 1134 [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))] 1135>; 1136 1137} // End usesCustomInserter = 1, isPseudo = 1 1138 1139} // End isCodeGenOnly = 1 1140 1141def CLAMP_R600 : CLAMP <R600_Reg32>; 1142def FABS_R600 : FABS<R600_Reg32>; 1143def FNEG_R600 : FNEG<R600_Reg32>; 1144 1145let usesCustomInserter = 1 in { 1146 1147def MASK_WRITE : AMDGPUShaderInst < 1148 (outs), 1149 (ins R600_Reg32:$src), 1150 "MASK_WRITE $src", 1151 [] 1152>; 1153 1154} // End usesCustomInserter = 1 1155 1156//===---------------------------------------------------------------------===// 1157// Return instruction 1158//===---------------------------------------------------------------------===// 1159let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { 1160 def RETURN : ILFormat<(outs), (ins variable_ops), 1161 "RETURN", [(IL_retflag)]>; 1162} 1163 1164//===----------------------------------------------------------------------===// 1165// ISel Patterns 1166//===----------------------------------------------------------------------===// 1167 1168// KIL Patterns 1169def KILP : Pat < 1170 (int_AMDGPU_kilp), 1171 (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO), 0)) 1172>; 1173 1174def KIL : Pat < 1175 (int_AMDGPU_kill R600_Reg32:$src0), 1176 (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0), 0)) 1177>; 1178 1179// SGT Reverse args 1180def : Pat < 1181 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), 1182 (SGT R600_Reg32:$src1, R600_Reg32:$src0) 1183>; 1184 1185// SGE Reverse args 1186def : Pat < 1187 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), 1188 (SGE R600_Reg32:$src1, R600_Reg32:$src0) 1189>; 1190 1191// SETGT_INT reverse args 1192def : Pat < 1193 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), 1194 (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) 1195>; 1196 1197// SETGE_INT reverse args 1198def : Pat < 1199 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), 1200 (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) 1201>; 1202 1203// SETGT_UINT reverse args 1204def : Pat < 1205 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), 1206 (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) 1207>; 1208 1209// SETGE_UINT reverse args 1210def : Pat < 1211 (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), 1212 (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1) 1213>; 1214 1215// The next two patterns are special cases for handling 'true if ordered' and 1216// 'true if unordered' conditionals. The assumption here is that the behavior of 1217// SETE and SNE conforms to the Direct3D 10 rules for floating point values 1218// described here: 1219// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit 1220// We assume that SETE returns false when one of the operands is NAN and 1221// SNE returns true when on of the operands is NAN 1222 1223//SETE - 'true if ordered' 1224def : Pat < 1225 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), 1226 (SETE R600_Reg32:$src0, R600_Reg32:$src1) 1227>; 1228 1229//SNE - 'true if unordered' 1230def : Pat < 1231 (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), 1232 (SNE R600_Reg32:$src0, R600_Reg32:$src1) 1233>; 1234 1235def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>; 1236def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>; 1237def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>; 1238def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>; 1239 1240def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 4, sel_x>; 1241def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>; 1242def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>; 1243def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>; 1244 1245def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; 1246def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; 1247def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; 1248def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; 1249 1250def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 4, sel_x>; 1251def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>; 1252def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>; 1253def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>; 1254 1255def : Vector_Build <v4f32, R600_Reg32>; 1256def : Vector_Build <v4i32, R600_Reg32>; 1257 1258// bitconvert patterns 1259 1260def : BitConvert <i32, f32, R600_Reg32>; 1261def : BitConvert <f32, i32, R600_Reg32>; 1262def : BitConvert <v4f32, v4i32, R600_Reg128>; 1263 1264} // End isR600toCayman Predicate 1265