1//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===------------------------------------------------------------===// 9 10include "llvm/Target/Target.td" 11 12//===------------------------------------------------------------===// 13// Subtarget Features (device properties) 14//===------------------------------------------------------------===// 15 16def FeatureFP64 : SubtargetFeature<"fp64", 17 "FP64", 18 "true", 19 "Enable double precision operations" 20>; 21 22def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf", 23 "FastFMAF32", 24 "true", 25 "Assuming f32 fma is at least as fast as mul + add" 26>; 27 28def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops", 29 "HalfRate64Ops", 30 "true", 31 "Most fp64 instructions are half rate instead of quarter" 32>; 33 34def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", 35 "R600ALUInst", 36 "false", 37 "Older version of ALU instructions encoding" 38>; 39 40def FeatureVertexCache : SubtargetFeature<"HasVertexCache", 41 "HasVertexCache", 42 "true", 43 "Specify use of dedicated vertex cache" 44>; 45 46def FeatureCaymanISA : SubtargetFeature<"caymanISA", 47 "CaymanISA", 48 "true", 49 "Use Cayman ISA" 50>; 51 52def FeatureCFALUBug : SubtargetFeature<"cfalubug", 53 "CFALUBug", 54 "true", 55 "GPU has CF_ALU bug" 56>; 57 58def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space", 59 "FlatAddressSpace", 60 "true", 61 "Support flat address space" 62>; 63 64def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access", 65 "UnalignedBufferAccess", 66 "true", 67 "Support unaligned global loads and stores" 68>; 69 70def FeatureXNACK : SubtargetFeature<"xnack", 71 "EnableXNACK", 72 "true", 73 "Enable XNACK support" 74>; 75 76def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug", 77 "SGPRInitBug", 78 "true", 79 "VI SGPR initilization bug requiring a fixed SGPR allocation size" 80>; 81 82class SubtargetFeatureFetchLimit <string Value> : 83 SubtargetFeature <"fetch"#Value, 84 "TexVTXClauseSize", 85 Value, 86 "Limit the maximum number of fetches in a clause to "#Value 87>; 88 89def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">; 90def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">; 91 92class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature< 93 "wavefrontsize"#Value, 94 "WavefrontSize", 95 !cast<string>(Value), 96 "The number of threads per wavefront" 97>; 98 99def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; 100def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; 101def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; 102 103class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature < 104 "ldsbankcount"#Value, 105 "LDSBankCount", 106 !cast<string>(Value), 107 "The number of LDS banks per compute unit." 108>; 109 110def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>; 111def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>; 112 113class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping> 114 : SubtargetFeature < 115 "isaver"#Major#"."#Minor#"."#Stepping, 116 "IsaVersion", 117 "ISAVersion"#Major#"_"#Minor#"_"#Stepping, 118 "Instruction set version number" 119>; 120 121def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>; 122def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1>; 123def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0>; 124def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>; 125def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>; 126 127class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< 128 "localmemorysize"#Value, 129 "LocalMemorySize", 130 !cast<string>(Value), 131 "The size of local memory in bytes" 132>; 133 134def FeatureGCN : SubtargetFeature<"gcn", 135 "IsGCN", 136 "true", 137 "GCN or newer GPU" 138>; 139 140def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding", 141 "GCN1Encoding", 142 "true", 143 "Encoding format for SI and CI" 144>; 145 146def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", 147 "GCN3Encoding", 148 "true", 149 "Encoding format for VI" 150>; 151 152def FeatureCIInsts : SubtargetFeature<"ci-insts", 153 "CIInsts", 154 "true", 155 "Additional intstructions for CI+" 156>; 157 158def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", 159 "HasSMemRealTime", 160 "true", 161 "Has s_memrealtime instruction" 162>; 163 164def Feature16BitInsts : SubtargetFeature<"16-bit-insts", 165 "Has16BitInsts", 166 "true", 167 "Has i16/f16 instructions" 168>; 169 170//===------------------------------------------------------------===// 171// Subtarget Features (options and debugging) 172//===------------------------------------------------------------===// 173 174// Some instructions do not support denormals despite this flag. Using 175// fp32 denormals also causes instructions to run at the double 176// precision rate for the device. 177def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals", 178 "FP32Denormals", 179 "true", 180 "Enable single precision denormal handling" 181>; 182 183def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals", 184 "FP64Denormals", 185 "true", 186 "Enable double precision denormal handling", 187 [FeatureFP64] 188>; 189 190def FeatureFPExceptions : SubtargetFeature<"fp-exceptions", 191 "FPExceptions", 192 "true", 193 "Enable floating point exceptions" 194>; 195 196class FeatureMaxPrivateElementSize<int size> : SubtargetFeature< 197 "max-private-element-size-"#size, 198 "MaxPrivateElementSize", 199 !cast<string>(size), 200 "Maximum private access size may be "#size 201>; 202 203def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>; 204def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>; 205def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>; 206 207def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling", 208 "EnableVGPRSpilling", 209 "true", 210 "Enable spilling of VGPRs to scratch memory" 211>; 212 213def FeatureDumpCode : SubtargetFeature <"DumpCode", 214 "DumpCode", 215 "true", 216 "Dump MachineInstrs in the CodeEmitter" 217>; 218 219def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", 220 "DumpCode", 221 "true", 222 "Dump MachineInstrs in the CodeEmitter" 223>; 224 225def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca", 226 "EnablePromoteAlloca", 227 "true", 228 "Enable promote alloca pass" 229>; 230 231// XXX - This should probably be removed once enabled by default 232def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", 233 "EnableLoadStoreOpt", 234 "true", 235 "Enable SI load/store optimizer pass" 236>; 237 238// Performance debugging feature. Allow using DS instruction immediate 239// offsets even if the base pointer can't be proven to be base. On SI, 240// base pointer values that won't give the same result as a 16-bit add 241// are not safe to fold, but this will override the conservative test 242// for the base pointer. 243def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < 244 "unsafe-ds-offset-folding", 245 "EnableUnsafeDSOffsetFolding", 246 "true", 247 "Force using DS instruction immediate offsets on SI" 248>; 249 250def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", 251 "EnableSIScheduler", 252 "true", 253 "Enable SI Machine Scheduler" 254>; 255 256def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", 257 "FlatForGlobal", 258 "true", 259 "Force to generate flat instruction for global" 260>; 261 262// Dummy feature used to disable assembler instructions. 263def FeatureDisable : SubtargetFeature<"", 264 "FeatureDisable","true", 265 "Dummy feature to disable assembler instructions" 266>; 267 268class SubtargetFeatureGeneration <string Value, 269 list<SubtargetFeature> Implies> : 270 SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value, 271 Value#" GPU generation", Implies>; 272 273def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>; 274def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>; 275def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; 276 277def FeatureR600 : SubtargetFeatureGeneration<"R600", 278 [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0] 279>; 280 281def FeatureR700 : SubtargetFeatureGeneration<"R700", 282 [FeatureFetchLimit16, FeatureLocalMemorySize0] 283>; 284 285def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN", 286 [FeatureFetchLimit16, FeatureLocalMemorySize32768] 287>; 288 289def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS", 290 [FeatureFetchLimit16, FeatureWavefrontSize64, 291 FeatureLocalMemorySize32768] 292>; 293 294def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", 295 [FeatureFP64, FeatureLocalMemorySize32768, 296 FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding, 297 FeatureLDSBankCount32] 298>; 299 300def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", 301 [FeatureFP64, FeatureLocalMemorySize65536, 302 FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace, 303 FeatureGCN1Encoding, FeatureCIInsts] 304>; 305 306def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", 307 [FeatureFP64, FeatureLocalMemorySize65536, 308 FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, 309 FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, 310 FeatureSMemRealTime 311 ] 312>; 313 314//===----------------------------------------------------------------------===// 315// Debugger related subtarget features. 316//===----------------------------------------------------------------------===// 317 318def FeatureDebuggerInsertNops : SubtargetFeature< 319 "amdgpu-debugger-insert-nops", 320 "DebuggerInsertNops", 321 "true", 322 "Insert one nop instruction for each high level source statement" 323>; 324 325def FeatureDebuggerReserveRegs : SubtargetFeature< 326 "amdgpu-debugger-reserve-regs", 327 "DebuggerReserveRegs", 328 "true", 329 "Reserve registers for debugger usage" 330>; 331 332def FeatureDebuggerEmitPrologue : SubtargetFeature< 333 "amdgpu-debugger-emit-prologue", 334 "DebuggerEmitPrologue", 335 "true", 336 "Emit debugger prologue" 337>; 338 339//===----------------------------------------------------------------------===// 340 341def AMDGPUInstrInfo : InstrInfo { 342 let guessInstructionProperties = 1; 343 let noNamedPositionallyEncodedOperands = 1; 344} 345 346def AMDGPUAsmParser : AsmParser { 347 // Some of the R600 registers have the same name, so this crashes. 348 // For example T0_XYZW and T0_XY both have the asm name T0. 349 let ShouldEmitMatchRegisterName = 0; 350} 351 352def AMDGPU : Target { 353 // Pull in Instruction Info: 354 let InstructionSet = AMDGPUInstrInfo; 355 let AssemblyParsers = [AMDGPUAsmParser]; 356} 357 358// Dummy Instruction itineraries for pseudo instructions 359def ALU_NULL : FuncUnit; 360def NullALU : InstrItinClass; 361 362//===----------------------------------------------------------------------===// 363// Predicate helper class 364//===----------------------------------------------------------------------===// 365 366def TruePredicate : Predicate<"true">; 367 368def isSICI : Predicate< 369 "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||" 370 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS" 371>, AssemblerPredicate<"FeatureGCN1Encoding">; 372 373def isVI : Predicate < 374 "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">, 375 AssemblerPredicate<"FeatureGCN3Encoding">; 376 377def isCIVI : Predicate < 378 "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || " 379 "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS" 380>, AssemblerPredicate<"FeatureCIInsts">; 381 382def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">; 383 384class PredicateControl { 385 Predicate SubtargetPredicate; 386 Predicate SIAssemblerPredicate = isSICI; 387 Predicate VIAssemblerPredicate = isVI; 388 list<Predicate> AssemblerPredicates = []; 389 Predicate AssemblerPredicate = TruePredicate; 390 list<Predicate> OtherPredicates = []; 391 list<Predicate> Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate], 392 AssemblerPredicates, 393 OtherPredicates); 394} 395 396// Include AMDGPU TD files 397include "R600Schedule.td" 398include "SISchedule.td" 399include "Processors.td" 400include "AMDGPUInstrInfo.td" 401include "AMDGPUIntrinsics.td" 402include "AMDGPURegisterInfo.td" 403include "AMDGPUInstructions.td" 404include "AMDGPUCallingConv.td" 405