1//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10//===----------------------------------------------------------------------===// 11// Declarations that describe the SI registers 12//===----------------------------------------------------------------------===// 13class SIReg <string n, bits<16> regIdx = 0> : Register<n>, 14 DwarfRegNum<[!cast<int>(HWEncoding)]> { 15 let Namespace = "AMDGPU"; 16 17 // This is the not yet the complete register encoding. An additional 18 // bit is set for VGPRs. 19 let HWEncoding = regIdx; 20} 21 22// Special Registers 23def VCC_LO : SIReg<"vcc_lo", 106>; 24def VCC_HI : SIReg<"vcc_hi", 107>; 25 26// VCC for 64-bit instructions 27def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, 28 DwarfRegAlias<VCC_LO> { 29 let Namespace = "AMDGPU"; 30 let SubRegIndices = [sub0, sub1]; 31 let HWEncoding = 106; 32} 33 34def EXEC_LO : SIReg<"exec_lo", 126>; 35def EXEC_HI : SIReg<"exec_hi", 127>; 36 37def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>, 38 DwarfRegAlias<EXEC_LO> { 39 let Namespace = "AMDGPU"; 40 let SubRegIndices = [sub0, sub1]; 41 let HWEncoding = 126; 42} 43 44def SCC : SIReg<"scc", 253>; 45def M0 : SIReg <"m0", 124>; 46 47// Trap handler registers 48def TBA_LO : SIReg<"tba_lo", 108>; 49def TBA_HI : SIReg<"tba_hi", 109>; 50 51def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, 52 DwarfRegAlias<TBA_LO> { 53 let Namespace = "AMDGPU"; 54 let SubRegIndices = [sub0, sub1]; 55 let HWEncoding = 108; 56} 57 58def TMA_LO : SIReg<"tma_lo", 110>; 59def TMA_HI : SIReg<"tma_hi", 111>; 60 61def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, 62 DwarfRegAlias<TMA_LO> { 63 let Namespace = "AMDGPU"; 64 let SubRegIndices = [sub0, sub1]; 65 let HWEncoding = 110; 66} 67 68def TTMP0 : SIReg <"ttmp0", 112>; 69def TTMP1 : SIReg <"ttmp1", 113>; 70def TTMP2 : SIReg <"ttmp2", 114>; 71def TTMP3 : SIReg <"ttmp3", 115>; 72def TTMP4 : SIReg <"ttmp4", 116>; 73def TTMP5 : SIReg <"ttmp5", 117>; 74def TTMP6 : SIReg <"ttmp6", 118>; 75def TTMP7 : SIReg <"ttmp7", 119>; 76def TTMP8 : SIReg <"ttmp8", 120>; 77def TTMP9 : SIReg <"ttmp9", 121>; 78def TTMP10 : SIReg <"ttmp10", 122>; 79def TTMP11 : SIReg <"ttmp11", 123>; 80 81multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> { 82 def _ci : SIReg<n, ci_e>; 83 def _vi : SIReg<n, vi_e>; 84 def "" : SIReg<"", 0>; 85} 86 87class FlatReg <Register lo, Register hi, bits<16> encoding> : 88 RegisterWithSubRegs<"flat_scratch", [lo, hi]>, 89 DwarfRegAlias<lo> { 90 let Namespace = "AMDGPU"; 91 let SubRegIndices = [sub0, sub1]; 92 let HWEncoding = encoding; 93} 94 95defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes. 96defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes. 97 98def FLAT_SCR_ci : FlatReg<FLAT_SCR_LO_ci, FLAT_SCR_HI_ci, 104>; 99def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>; 100def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>; 101 102// SGPR registers 103foreach Index = 0-103 in { 104 def SGPR#Index : SIReg <"SGPR"#Index, Index>; 105} 106 107// VGPR registers 108foreach Index = 0-255 in { 109 def VGPR#Index : SIReg <"VGPR"#Index, Index> { 110 let HWEncoding{8} = 1; 111 } 112} 113 114//===----------------------------------------------------------------------===// 115// Groupings using register classes and tuples 116//===----------------------------------------------------------------------===// 117 118def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> { 119 let CopyCost = -1; 120 let isAllocatable = 0; 121} 122 123// TODO: Do we need to set DwarfRegAlias on register tuples? 124 125// SGPR 32-bit registers 126def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, 127 (add (sequence "SGPR%u", 0, 103))> { 128 let AllocationPriority = 1; 129} 130 131// SGPR 64-bit registers 132def SGPR_64Regs : RegisterTuples<[sub0, sub1], 133 [(add (decimate SGPR_32, 2)), 134 (add (decimate (shl SGPR_32, 1), 2))]>; 135 136// SGPR 128-bit registers 137def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], 138 [(add (decimate SGPR_32, 4)), 139 (add (decimate (shl SGPR_32, 1), 4)), 140 (add (decimate (shl SGPR_32, 2), 4)), 141 (add (decimate (shl SGPR_32, 3), 4))]>; 142 143// SGPR 256-bit registers 144def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], 145 [(add (decimate SGPR_32, 4)), 146 (add (decimate (shl SGPR_32, 1), 4)), 147 (add (decimate (shl SGPR_32, 2), 4)), 148 (add (decimate (shl SGPR_32, 3), 4)), 149 (add (decimate (shl SGPR_32, 4), 4)), 150 (add (decimate (shl SGPR_32, 5), 4)), 151 (add (decimate (shl SGPR_32, 6), 4)), 152 (add (decimate (shl SGPR_32, 7), 4))]>; 153 154// SGPR 512-bit registers 155def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, 156 sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], 157 [(add (decimate SGPR_32, 4)), 158 (add (decimate (shl SGPR_32, 1), 4)), 159 (add (decimate (shl SGPR_32, 2), 4)), 160 (add (decimate (shl SGPR_32, 3), 4)), 161 (add (decimate (shl SGPR_32, 4), 4)), 162 (add (decimate (shl SGPR_32, 5), 4)), 163 (add (decimate (shl SGPR_32, 6), 4)), 164 (add (decimate (shl SGPR_32, 7), 4)), 165 (add (decimate (shl SGPR_32, 8), 4)), 166 (add (decimate (shl SGPR_32, 9), 4)), 167 (add (decimate (shl SGPR_32, 10), 4)), 168 (add (decimate (shl SGPR_32, 11), 4)), 169 (add (decimate (shl SGPR_32, 12), 4)), 170 (add (decimate (shl SGPR_32, 13), 4)), 171 (add (decimate (shl SGPR_32, 14), 4)), 172 (add (decimate (shl SGPR_32, 15), 4))]>; 173 174// Trap handler TMP 32-bit registers 175def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32, 176 (add (sequence "TTMP%u", 0, 11))> { 177 let isAllocatable = 0; 178} 179 180// Trap handler TMP 64-bit registers 181def TTMP_64Regs : RegisterTuples<[sub0, sub1], 182 [(add (decimate TTMP_32, 2)), 183 (add (decimate (shl TTMP_32, 1), 2))]>; 184 185// Trap handler TMP 128-bit registers 186def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3], 187 [(add (decimate TTMP_32, 4)), 188 (add (decimate (shl TTMP_32, 1), 4)), 189 (add (decimate (shl TTMP_32, 2), 4)), 190 (add (decimate (shl TTMP_32, 3), 4))]>; 191 192// VGPR 32-bit registers 193def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32, 194 (add (sequence "VGPR%u", 0, 255))> { 195 let AllocationPriority = 1; 196} 197 198// VGPR 64-bit registers 199def VGPR_64 : RegisterTuples<[sub0, sub1], 200 [(add (trunc VGPR_32, 255)), 201 (add (shl VGPR_32, 1))]>; 202 203// VGPR 96-bit registers 204def VGPR_96 : RegisterTuples<[sub0, sub1, sub2], 205 [(add (trunc VGPR_32, 254)), 206 (add (shl VGPR_32, 1)), 207 (add (shl VGPR_32, 2))]>; 208 209// VGPR 128-bit registers 210def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], 211 [(add (trunc VGPR_32, 253)), 212 (add (shl VGPR_32, 1)), 213 (add (shl VGPR_32, 2)), 214 (add (shl VGPR_32, 3))]>; 215 216// VGPR 256-bit registers 217def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7], 218 [(add (trunc VGPR_32, 249)), 219 (add (shl VGPR_32, 1)), 220 (add (shl VGPR_32, 2)), 221 (add (shl VGPR_32, 3)), 222 (add (shl VGPR_32, 4)), 223 (add (shl VGPR_32, 5)), 224 (add (shl VGPR_32, 6)), 225 (add (shl VGPR_32, 7))]>; 226 227// VGPR 512-bit registers 228def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, 229 sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15], 230 [(add (trunc VGPR_32, 241)), 231 (add (shl VGPR_32, 1)), 232 (add (shl VGPR_32, 2)), 233 (add (shl VGPR_32, 3)), 234 (add (shl VGPR_32, 4)), 235 (add (shl VGPR_32, 5)), 236 (add (shl VGPR_32, 6)), 237 (add (shl VGPR_32, 7)), 238 (add (shl VGPR_32, 8)), 239 (add (shl VGPR_32, 9)), 240 (add (shl VGPR_32, 10)), 241 (add (shl VGPR_32, 11)), 242 (add (shl VGPR_32, 12)), 243 (add (shl VGPR_32, 13)), 244 (add (shl VGPR_32, 14)), 245 (add (shl VGPR_32, 15))]>; 246 247//===----------------------------------------------------------------------===// 248// Register classes used as source and destination 249//===----------------------------------------------------------------------===// 250 251class RegImmMatcher<string name> : AsmOperandClass { 252 let Name = name; 253 let RenderMethod = "addRegOrImmOperands"; 254} 255 256// Subset of SReg_32 without M0 for SMRD instructions and alike. 257// See comments in SIInstructions.td for more info. 258def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32, 259 (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI, 260 TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> { 261 let AllocationPriority = 1; 262} 263 264// Register class for all scalar registers (SGPRs + Special Registers) 265def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32, 266 (add SReg_32_XM0, M0)> { 267 let AllocationPriority = 1; 268} 269 270def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> { 271 let AllocationPriority = 2; 272} 273 274def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> { 275 let isAllocatable = 0; 276} 277 278def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32, 279 (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)> { 280 let AllocationPriority = 2; 281} 282 283// Requires 2 s_mov_b64 to copy 284let CopyCost = 2 in { 285 286def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)> { 287 let AllocationPriority = 4; 288} 289 290def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> { 291 let isAllocatable = 0; 292} 293 294def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)> { 295 let AllocationPriority = 4; 296} 297 298} // End CopyCost = 2 299 300def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> { 301 // Requires 4 s_mov_b64 to copy 302 let CopyCost = 4; 303 let AllocationPriority = 5; 304} 305 306def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> { 307 // Requires 8 s_mov_b64 to copy 308 let CopyCost = 8; 309 let AllocationPriority = 6; 310} 311 312// Register class for all vector registers (VGPRs + Interploation Registers) 313def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> { 314 // Requires 2 v_mov_b32 to copy 315 let CopyCost = 2; 316 let AllocationPriority = 2; 317} 318 319def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> { 320 let Size = 96; 321 322 // Requires 3 v_mov_b32 to copy 323 let CopyCost = 3; 324 let AllocationPriority = 3; 325} 326 327def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> { 328 // Requires 4 v_mov_b32 to copy 329 let CopyCost = 4; 330 let AllocationPriority = 4; 331} 332 333def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> { 334 let CopyCost = 8; 335 let AllocationPriority = 5; 336} 337 338def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> { 339 let CopyCost = 16; 340 let AllocationPriority = 6; 341} 342 343def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { 344 let Size = 32; 345} 346 347class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> { 348 let OperandNamespace = "AMDGPU"; 349 let OperandType = "OPERAND_REG_IMM32"; 350} 351 352class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> { 353 let OperandNamespace = "AMDGPU"; 354 let OperandType = "OPERAND_REG_INLINE_C"; 355} 356 357//===----------------------------------------------------------------------===// 358// SSrc_* Operands with an SGPR or a 32-bit immediate 359//===----------------------------------------------------------------------===// 360 361def SSrc_32 : RegImmOperand<SReg_32> { 362 let ParserMatchClass = RegImmMatcher<"SSrc32">; 363} 364 365def SSrc_64 : RegImmOperand<SReg_64> { 366 let ParserMatchClass = RegImmMatcher<"SSrc64">; 367} 368 369//===----------------------------------------------------------------------===// 370// SCSrc_* Operands with an SGPR or a inline constant 371//===----------------------------------------------------------------------===// 372 373def SCSrc_32 : RegInlineOperand<SReg_32> { 374 let ParserMatchClass = RegImmMatcher<"SCSrc32">; 375} 376 377//===----------------------------------------------------------------------===// 378// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate 379//===----------------------------------------------------------------------===// 380 381def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>; 382 383def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> { 384 let CopyCost = 2; 385} 386 387def VSrc_32 : RegisterOperand<VS_32> { 388 let OperandNamespace = "AMDGPU"; 389 let OperandType = "OPERAND_REG_IMM32"; 390 let ParserMatchClass = RegImmMatcher<"VSrc32">; 391} 392 393def VSrc_64 : RegisterOperand<VS_64> { 394 let OperandNamespace = "AMDGPU"; 395 let OperandType = "OPERAND_REG_IMM32"; 396 let ParserMatchClass = RegImmMatcher<"VSrc64">; 397} 398 399//===----------------------------------------------------------------------===// 400// VCSrc_* Operands with an SGPR, VGPR or an inline constant 401//===----------------------------------------------------------------------===// 402 403def VCSrc_32 : RegisterOperand<VS_32> { 404 let OperandNamespace = "AMDGPU"; 405 let OperandType = "OPERAND_REG_INLINE_C"; 406 let ParserMatchClass = RegImmMatcher<"VCSrc32">; 407} 408 409def VCSrc_64 : RegisterOperand<VS_64> { 410 let OperandNamespace = "AMDGPU"; 411 let OperandType = "OPERAND_REG_INLINE_C"; 412 let ParserMatchClass = RegImmMatcher<"VCSrc64">; 413} 414 415//===----------------------------------------------------------------------===// 416// SCSrc_* Operands with an SGPR or an inline constant 417//===----------------------------------------------------------------------===// 418 419def SCSrc_64 : RegisterOperand<SReg_64> { 420 let OperandNamespace = "AMDGPU"; 421 let OperandType = "OPERAND_REG_INLINE_C"; 422 let ParserMatchClass = RegImmMatcher<"SCSrc64">; 423} 424