1//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains DAG node defintions for the AMDGPU target. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// AMDGPU DAG Profiles 16//===----------------------------------------------------------------------===// 17 18def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ 19 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> 20]>; 21 22def AMDGPUTrigPreOp : SDTypeProfile<1, 2, 23 [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] 24>; 25 26def AMDGPULdExpOp : SDTypeProfile<1, 2, 27 [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] 28>; 29 30def AMDGPUFPClassOp : SDTypeProfile<1, 2, 31 [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>] 32>; 33 34def AMDGPUFPPackOp : SDTypeProfile<1, 2, 35 [SDTCisFP<1>, SDTCisSameAs<1, 2>] 36>; 37 38def AMDGPUIntPackOp : SDTypeProfile<1, 2, 39 [SDTCisInt<1>, SDTCisSameAs<1, 2>] 40>; 41 42def AMDGPUDivScaleOp : SDTypeProfile<2, 3, 43 [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] 44>; 45 46// float, float, float, vcc 47def AMDGPUFmasOp : SDTypeProfile<1, 4, 48 [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>] 49>; 50 51def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 52 53def AMDGPUIfOp : SDTypeProfile<1, 2, 54 [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] 55>; 56 57def AMDGPUElseOp : SDTypeProfile<1, 2, 58 [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>] 59>; 60 61def AMDGPULoopOp : SDTypeProfile<0, 2, 62 [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>] 63>; 64 65def AMDGPUBreakOp : SDTypeProfile<1, 1, 66 [SDTCisVT<0, i64>, SDTCisVT<1, i64>] 67>; 68 69def AMDGPUIfBreakOp : SDTypeProfile<1, 2, 70 [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>] 71>; 72 73def AMDGPUElseBreakOp : SDTypeProfile<1, 2, 74 [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>] 75>; 76 77def AMDGPUAddeSubeOp : SDTypeProfile<2, 3, 78 [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisVT<0, i32>, SDTCisVT<1, i1>, SDTCisVT<4, i1>] 79>; 80 81def SDT_AMDGPUTCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 82 83//===----------------------------------------------------------------------===// 84// AMDGPU DAG Nodes 85// 86 87def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>; 88def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>; 89def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>; 90 91def callseq_start : SDNode<"ISD::CALLSEQ_START", 92 SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, 93 [SDNPHasChain, SDNPOutGlue] 94>; 95 96def callseq_end : SDNode<"ISD::CALLSEQ_END", 97 SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, 98 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 99>; 100 101def AMDGPUcall : SDNode<"AMDGPUISD::CALL", 102 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 103 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 104 SDNPVariadic] 105>; 106 107def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN", SDT_AMDGPUTCRET, 108 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 109>; 110 111def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", 112 SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>, 113 [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue] 114>; 115 116def AMDGPUconstdata_ptr : SDNode< 117 "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>, 118 SDTCisVT<0, iPTR>]> 119>; 120 121// This argument to this node is a dword address. 122def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; 123 124// Force dependencies for vector trunc stores 125def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>; 126 127def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; 128def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; 129 130// out = a - floor(a) 131def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; 132 133// out = 1.0 / a 134def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; 135 136// out = 1.0 / sqrt(a) 137def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; 138 139// out = 1.0 / sqrt(a) 140def AMDGPUrcp_legacy : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; 141def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; 142 143def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>; 144 145// out = 1.0 / sqrt(a) result clamped to +/- max_float. 146def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; 147 148def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>; 149 150def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; 151def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; 152def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; 153def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; 154def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; 155def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; 156def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>; 157 158 159def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; 160 161// out = max(a, b) a and b are floats, where a nan comparison fails. 162// This is not commutative because this gives the second operand: 163// x < nan ? x : nan -> nan 164// nan < x ? nan : x -> x 165def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp, 166 [] 167>; 168 169def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp, 170 [SDNPCommutative, SDNPAssociative] 171>; 172 173// out = min(a, b) a and b are floats, where a nan comparison fails. 174def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, 175 [] 176>; 177 178// FIXME: TableGen doesn't like commutative instructions with more 179// than 2 operands. 180// out = max(a, b, c) a, b and c are floats 181def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp, 182 [/*SDNPCommutative, SDNPAssociative*/] 183>; 184 185// out = max(a, b, c) a, b, and c are signed ints 186def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp, 187 [/*SDNPCommutative, SDNPAssociative*/] 188>; 189 190// out = max(a, b, c) a, b and c are unsigned ints 191def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp, 192 [/*SDNPCommutative, SDNPAssociative*/] 193>; 194 195// out = min(a, b, c) a, b and c are floats 196def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp, 197 [/*SDNPCommutative, SDNPAssociative*/] 198>; 199 200// out = min(a, b, c) a, b and c are signed ints 201def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp, 202 [/*SDNPCommutative, SDNPAssociative*/] 203>; 204 205// out = min(a, b) a and b are unsigned ints 206def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp, 207 [/*SDNPCommutative, SDNPAssociative*/] 208>; 209 210// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0 211def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; 212 213// out = (src1 > src0) ? 1 : 0 214def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; 215 216// TODO: remove AMDGPUadde/AMDGPUsube when ADDCARRY/SUBCARRY get their own 217// nodes in TargetSelectionDAG.td. 218def AMDGPUadde : SDNode<"ISD::ADDCARRY", AMDGPUAddeSubeOp, []>; 219 220def AMDGPUsube : SDNode<"ISD::SUBCARRY", AMDGPUAddeSubeOp, []>; 221 222def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc 223 SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> 224]>; 225 226def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; 227 228def AMDGPUSetRegOp : SDTypeProfile<0, 2, [ 229 SDTCisInt<0>, SDTCisInt<1> 230]>; 231 232def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [ 233 SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; 234 235def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [ 236 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 237 238def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [ 239 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 240 241def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", 242 SDTIntToFPOp, []>; 243def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1", 244 SDTIntToFPOp, []>; 245def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2", 246 SDTIntToFPOp, []>; 247def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3", 248 SDTIntToFPOp, []>; 249 250 251// urecip - This operation is a helper for integer division, it returns the 252// result of 1 / a as a fractional unsigned integer. 253// out = (2^32 / a) + e 254// e is rounding error 255def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; 256 257// Special case divide preop and flags. 258def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>; 259 260// Special case divide FMA with scale and flags (src0 = Quotient, 261// src1 = Denominator, src2 = Numerator). 262def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>; 263 264// Single or double precision division fixup. 265// Special case divide fixup and flags(src0 = Quotient, src1 = 266// Denominator, src2 = Numerator). 267def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>; 268 269def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>; 270 271// Look Up 2.0 / pi src0 with segment select src1[4:0] 272def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>; 273 274def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", 275 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 276 [SDNPHasChain, SDNPMayLoad]>; 277 278def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", 279 SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 280 [SDNPHasChain, SDNPMayStore]>; 281 282// MSKOR instructions are atomic memory instructions used mainly for storing 283// 8-bit and 16-bit values. The definition is: 284// 285// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src) 286// 287// src0: vec4(src, 0, 0, mask) 288// src1: dst - rat offset (aka pointer) in dwords 289def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", 290 SDTypeProfile<0, 2, []>, 291 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 292 293def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP", 294 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisVec<2>]>, 295 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, 296 SDNPMemOperand]>; 297 298def AMDGPUround : SDNode<"ISD::FROUND", 299 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>; 300 301def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>; 302def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; 303def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; 304def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; 305 306def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; 307def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; 308 309def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>; 310 311// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore 312// when performing the mulitply. The result is a 32-bit value. 313def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, 314 [SDNPCommutative, SDNPAssociative] 315>; 316def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp, 317 [SDNPCommutative, SDNPAssociative] 318>; 319 320def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp, 321 [SDNPCommutative, SDNPAssociative] 322>; 323def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp, 324 [SDNPCommutative, SDNPAssociative] 325>; 326 327def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, 328 [] 329>; 330def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp, 331 [] 332>; 333 334def AMDGPUsmed3 : SDNode<"AMDGPUISD::SMED3", AMDGPUDTIntTernaryOp, 335 [] 336>; 337 338def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, 339 [] 340>; 341 342def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; 343 344def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2", 345 SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, 346 SDTCisFP<0>, SDTCisVec<1>, 347 SDTCisInt<4>]>, 348 []>; 349 350def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; 351 352def AMDGPUinit_exec : SDNode<"AMDGPUISD::INIT_EXEC", 353 SDTypeProfile<0, 1, [SDTCisInt<0>]>, 354 [SDNPHasChain, SDNPInGlue]>; 355 356def AMDGPUinit_exec_from_input : SDNode<"AMDGPUISD::INIT_EXEC_FROM_INPUT", 357 SDTypeProfile<0, 2, 358 [SDTCisInt<0>, SDTCisInt<1>]>, 359 [SDNPHasChain, SDNPInGlue]>; 360 361def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", 362 SDTypeProfile<0, 1, [SDTCisInt<0>]>, 363 [SDNPHasChain, SDNPInGlue]>; 364 365def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", 366 SDTypeProfile<0, 1, [SDTCisInt<0>]>, 367 [SDNPHasChain, SDNPInGlue]>; 368 369def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", 370 SDTypeProfile<1, 3, [SDTCisFP<0>]>, 371 [SDNPInGlue]>; 372 373def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1", 374 SDTypeProfile<1, 3, [SDTCisFP<0>]>, 375 [SDNPInGlue, SDNPOutGlue]>; 376 377def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2", 378 SDTypeProfile<1, 4, [SDTCisFP<0>]>, 379 [SDNPInGlue]>; 380 381 382def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT, 383 [SDNPHasChain, SDNPSideEffect]>; 384 385// SI+ export 386def AMDGPUExportOp : SDTypeProfile<0, 8, [ 387 SDTCisInt<0>, // i8 tgt 388 SDTCisInt<1>, // i8 en 389 // i32 or f32 src0 390 SDTCisSameAs<3, 2>, // f32 src1 391 SDTCisSameAs<4, 2>, // f32 src2 392 SDTCisSameAs<5, 2>, // f32 src3 393 SDTCisInt<6>, // i1 compr 394 // skip done 395 SDTCisInt<1> // i1 vm 396 397]>; 398 399def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp, 400 [SDNPHasChain, SDNPMayStore]>; 401 402def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp, 403 [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; 404 405 406def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; 407 408def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp, 409 [SDNPHasChain, SDNPSideEffect]>; 410 411//===----------------------------------------------------------------------===// 412// Flow Control Profile Types 413//===----------------------------------------------------------------------===// 414// Branch instruction where second and third are basic blocks 415def SDTIL_BRCond : SDTypeProfile<0, 2, [ 416 SDTCisVT<0, OtherVT> 417 ]>; 418 419//===----------------------------------------------------------------------===// 420// Flow Control DAG Nodes 421//===----------------------------------------------------------------------===// 422def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; 423 424//===----------------------------------------------------------------------===// 425// Call/Return DAG Nodes 426//===----------------------------------------------------------------------===// 427def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, 428 [SDNPHasChain, SDNPOptInGlue]>; 429 430def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, 431 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 432 433def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, 434 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 435>; 436