• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//===-- SIRegisterInfo.td - SI Register defs ---------------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10//  Subregister declarations
11//===----------------------------------------------------------------------===//
12
13class Indexes<int N> {
14  list<int> all = [0,   1,  2,  3,  4,  5,  6 , 7,
15                   8,   9, 10, 11, 12, 13, 14, 15,
16                   16, 17, 18, 19, 20, 21, 22, 23,
17                   24, 25, 26, 27, 28, 29, 30, 31];
18
19  // Returns list of indexes [0..N)
20  list<int> slice = !filter(i, all, !lt(i, N));
21}
22
23let Namespace = "AMDGPU" in {
24
25def lo16 : SubRegIndex<16, 0>;
26def hi16 : SubRegIndex<16, 16>;
27
28foreach Index = 0...31 in {
29  def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
30}
31
32foreach Index = 1...31 in {
33  def sub#Index#_lo16 : ComposedSubRegIndex<!cast<SubRegIndex>(sub#Index), lo16>;
34  def sub#Index#_hi16 : ComposedSubRegIndex<!cast<SubRegIndex>(sub#Index), hi16>;
35}
36
37foreach Size = {2...6,8,16} in {
38  foreach Index = Indexes<!sub(33, Size)>.slice in {
39    def !interleave(!foreach(cur, Indexes<Size>.slice, "sub"#!add(cur, Index)),
40                    "_") :
41      SubRegIndex<!mul(Size, 32), !shl(Index, 5)> {
42      let CoveringSubRegIndices =
43        !foreach(cur, Indexes<Size>.slice,
44                 !cast<SubRegIndex>(sub#!add(cur, Index)));
45    }
46  }
47}
48
49}
50
51//===----------------------------------------------------------------------===//
52//  Helpers
53//===----------------------------------------------------------------------===//
54
55class getSubRegs<int size> {
56  list<SubRegIndex> ret2 = [sub0, sub1];
57  list<SubRegIndex> ret3 = [sub0, sub1, sub2];
58  list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
59  list<SubRegIndex> ret5 = [sub0, sub1, sub2, sub3, sub4];
60  list<SubRegIndex> ret6 = [sub0, sub1, sub2, sub3, sub4, sub5];
61  list<SubRegIndex> ret7 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6];
62  list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
63  list<SubRegIndex> ret9 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7, sub8];
64  list<SubRegIndex> ret10 = [sub0, sub1, sub2, sub3,
65                             sub4, sub5, sub6, sub7,
66                             sub8, sub9];
67  list<SubRegIndex> ret11 = [sub0, sub1, sub2, sub3,
68                             sub4, sub5, sub6, sub7,
69                             sub8, sub9, sub10];
70  list<SubRegIndex> ret12 = [sub0, sub1, sub2, sub3,
71                             sub4, sub5, sub6, sub7,
72                             sub8, sub9, sub10, sub11];
73  list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
74                             sub4, sub5, sub6, sub7,
75                             sub8, sub9, sub10, sub11,
76                             sub12, sub13, sub14, sub15];
77  list<SubRegIndex> ret32 = [sub0, sub1, sub2, sub3,
78                             sub4, sub5, sub6, sub7,
79                             sub8, sub9, sub10, sub11,
80                             sub12, sub13, sub14, sub15,
81                             sub16, sub17, sub18, sub19,
82                             sub20, sub21, sub22, sub23,
83                             sub24, sub25, sub26, sub27,
84                             sub28, sub29, sub30, sub31];
85
86  list<SubRegIndex> ret = !if(!eq(size, 2), ret2,
87                              !if(!eq(size, 3), ret3,
88                                  !if(!eq(size, 4), ret4,
89                                      !if(!eq(size, 5), ret5,
90                                          !if(!eq(size, 6), ret6,
91                                              !if(!eq(size, 7), ret7,
92                                                  !if(!eq(size, 8), ret8,
93                                                      !if(!eq(size, 9), ret9,
94                                                          !if(!eq(size, 10), ret10,
95                                                              !if(!eq(size, 11), ret11,
96                                                                  !if(!eq(size, 12), ret12,
97                                                                      !if(!eq(size, 16), ret16,
98                                                                          ret32))))))))))));
99}
100
101// Generates list of sequential register tuple names.
102// E.g. RegSeq<3,2,2,"s">.ret -> [ "s[0:1]", "s[2:3]" ]
103class RegSeqNames<int last_reg, int stride, int size, string prefix,
104                  int start = 0> {
105  int next = !add(start, stride);
106  int end_reg = !add(start, size, -1);
107  list<string> ret =
108    !if(!le(end_reg, last_reg),
109        !listconcat([prefix # "[" # start # ":" # end_reg # "]"],
110                    RegSeqNames<last_reg, stride, size, prefix, next>.ret),
111                    []);
112}
113
114// Generates list of dags for register tuples.
115class RegSeqDags<RegisterClass RC, int last_reg, int stride, int size,
116                int start = 0> {
117  dag trunc_rc = (trunc RC,
118                  !if(!and(!eq(stride, 1), !eq(start, 0)),
119                      !sub(!add(last_reg, 2), size),
120                      !add(last_reg, 1)));
121  list<dag> ret =
122    !if(!lt(start, size),
123        !listconcat([(add (decimate (shl trunc_rc, start), stride))],
124                    RegSeqDags<RC, last_reg, stride, size, !add(start, 1)>.ret),
125        []);
126}
127
128class SIRegisterTuples<list<SubRegIndex> Indices, RegisterClass RC,
129                       int last_reg, int stride, int size, string prefix> :
130  RegisterTuples<Indices,
131                 RegSeqDags<RC, last_reg, stride, size>.ret,
132                 RegSeqNames<last_reg, stride, size, prefix>.ret>;
133
134//===----------------------------------------------------------------------===//
135//  Declarations that describe the SI registers
136//===----------------------------------------------------------------------===//
137class SIReg <string n, bits<16> regIdx = 0> :
138  Register<n> {
139  let Namespace = "AMDGPU";
140  let HWEncoding = regIdx;
141}
142
143// For register classes that use TSFlags.
144class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
145  : RegisterClass <n, rTypes, Align, rList> {
146  // For vector register classes.
147  field bit HasVGPR = 0;
148  field bit HasAGPR = 0;
149
150  // For scalar register classes.
151  field bit HasSGPR = 0;
152
153  // These need to be kept in sync with the enum SIRCFlags.
154  let TSFlags{0} = HasVGPR;
155  let TSFlags{1} = HasAGPR;
156  let TSFlags{2} = HasSGPR;
157}
158
159multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
160                        bit HWEncodingHigh = 0> {
161  // There is no special encoding for 16 bit subregs, these are not real
162  // registers but rather operands for instructions preserving other 16 bits
163  // of the result or reading just 16 bits of a 32 bit VGPR.
164  // It is encoded as a corresponding 32 bit register.
165  // Non-VGPR register classes use it as we need to have matching subregisters
166  // to move instructions and data between ALUs.
167  def _LO16 : SIReg<n#".l", regIdx> {
168    let HWEncoding{8} = HWEncodingHigh;
169  }
170  def _HI16 : SIReg<!if(ArtificialHigh, "", n#".h"), regIdx> {
171    let isArtificial = ArtificialHigh;
172    let HWEncoding{8} = HWEncodingHigh;
173  }
174  def "" : RegisterWithSubRegs<n, [!cast<Register>(NAME#"_LO16"),
175                                   !cast<Register>(NAME#"_HI16")]> {
176    let Namespace = "AMDGPU";
177    let SubRegIndices = [lo16, hi16];
178    let CoveredBySubRegs = !not(ArtificialHigh);
179    let HWEncoding = regIdx;
180    let HWEncoding{8} = HWEncodingHigh;
181  }
182}
183
184// Special Registers
185defm VCC_LO : SIRegLoHi16<"vcc_lo", 106>;
186defm VCC_HI : SIRegLoHi16<"vcc_hi", 107>;
187
188// Pseudo-registers: Used as placeholders during isel and immediately
189// replaced, never seeing the verifier.
190def PRIVATE_RSRC_REG : SIReg<"private_rsrc", 0>;
191def FP_REG : SIReg<"fp", 0>;
192def SP_REG : SIReg<"sp", 0>;
193
194// Pseudo-register to represent the program-counter DWARF register.
195def PC_REG : SIReg<"pc", 0>, DwarfRegNum<[16, 16]> {
196  // There is no physical register corresponding to a "program counter", but
197  // we need to encode the concept in debug information in order to represent
198  // things like the return value in unwind information.
199  let isArtificial = 1;
200}
201
202// VCC for 64-bit instructions
203def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
204  let Namespace = "AMDGPU";
205  let SubRegIndices = [sub0, sub1];
206  let HWEncoding = VCC_LO.HWEncoding;
207}
208
209defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>;
210defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>;
211
212def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> {
213  let Namespace = "AMDGPU";
214  let SubRegIndices = [sub0, sub1];
215  let HWEncoding = EXEC_LO.HWEncoding;
216}
217
218// 32-bit real registers, for MC only.
219// May be used with both 32-bit and 64-bit operands.
220defm SRC_VCCZ : SIRegLoHi16<"src_vccz", 251>;
221defm SRC_EXECZ : SIRegLoHi16<"src_execz", 252>;
222defm SRC_SCC : SIRegLoHi16<"src_scc", 253>;
223
224// 1-bit pseudo register, for codegen only.
225// Should never be emitted.
226def SCC : SIReg<"scc">;
227
228// Encoding changes between subtarget generations.
229// See also Utils/AMDGPUBaseInfo.cpp MAP_REG2REG.
230defm M0_gfxpre11 : SIRegLoHi16 <"m0", 124>;
231defm M0_gfx11plus : SIRegLoHi16 <"m0", 125>;
232defm M0 : SIRegLoHi16 <"m0", 0>;
233
234defm SGPR_NULL_gfxpre11 : SIRegLoHi16 <"null", 125>;
235defm SGPR_NULL_gfx11plus : SIRegLoHi16 <"null", 124>;
236let isConstant = true in {
237defm SGPR_NULL : SIRegLoHi16 <"null", 0>;
238defm SGPR_NULL_HI : SIRegLoHi16 <"", 0>;
239} // isConstant = true
240
241def SGPR_NULL64 :
242    RegisterWithSubRegs<"null", [SGPR_NULL, SGPR_NULL_HI]> {
243  let Namespace = "AMDGPU";
244  let SubRegIndices = [sub0, sub1];
245  let HWEncoding = SGPR_NULL.HWEncoding;
246  let isConstant = true;
247}
248
249// Aperture registers are 64 bit registers with a LO/HI 32 bit.
250// HI 32 bit cannot be used, and LO 32 is used by instructions
251// with 32 bit sources.
252//
253// Note that the low 32 bits are essentially useless as they
254// don't contain the lower 32 bits of the address - they are in
255// the high 32 bits. The lower 32 bits are always zero (for base) or
256// -1 (for limit). Since we cannot access the high 32 bits, when we
257// need them, we need to do a 64 bit load and extract the bits manually.
258multiclass ApertureRegister<string name, bits<16> regIdx> {
259  let isConstant = true in {
260    // FIXME: We shouldn't need to define subregisters for these (nor add them to any 16 bit
261    //  register classes), but if we don't it seems to confuse the TableGen
262    //  backend and we end up with a lot of weird register pressure sets and classes.
263    defm _LO : SIRegLoHi16 <name, regIdx>;
264    defm _HI : SIRegLoHi16 <"", regIdx>;
265
266    def "" : RegisterWithSubRegs<name, [!cast<Register>(NAME#_LO), !cast<Register>(NAME#_HI)]> {
267      let Namespace = "AMDGPU";
268      let SubRegIndices = [sub0, sub1];
269      let HWEncoding = !cast<Register>(NAME#_LO).HWEncoding;
270    }
271  } // isConstant = true
272}
273
274defm SRC_SHARED_BASE   : ApertureRegister<"src_shared_base",   235>;
275defm SRC_SHARED_LIMIT  : ApertureRegister<"src_shared_limit",  236>;
276defm SRC_PRIVATE_BASE  : ApertureRegister<"src_private_base",  237>;
277defm SRC_PRIVATE_LIMIT : ApertureRegister<"src_private_limit", 238>;
278
279defm SRC_POPS_EXITING_WAVE_ID : SIRegLoHi16<"src_pops_exiting_wave_id", 239>;
280
281// Not addressable
282def MODE : SIReg <"mode", 0>;
283
284def LDS_DIRECT : SIReg <"src_lds_direct", 254> {
285  // There is no physical register corresponding to this. This is an
286  // encoding value in a source field, which will ultimately trigger a
287  // read from m0.
288  let isArtificial = 1;
289}
290
291defm XNACK_MASK_LO : SIRegLoHi16<"xnack_mask_lo", 104>;
292defm XNACK_MASK_HI : SIRegLoHi16<"xnack_mask_hi", 105>;
293
294def XNACK_MASK :
295    RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]> {
296  let Namespace = "AMDGPU";
297  let SubRegIndices = [sub0, sub1];
298  let HWEncoding = XNACK_MASK_LO.HWEncoding;
299}
300
301// Trap handler registers
302defm TBA_LO : SIRegLoHi16<"tba_lo", 108>;
303defm TBA_HI : SIRegLoHi16<"tba_hi", 109>;
304
305def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]> {
306  let Namespace = "AMDGPU";
307  let SubRegIndices = [sub0, sub1];
308  let HWEncoding = TBA_LO.HWEncoding;
309}
310
311defm TMA_LO : SIRegLoHi16<"tma_lo", 110>;
312defm TMA_HI : SIRegLoHi16<"tma_hi", 111>;
313
314def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]> {
315  let Namespace = "AMDGPU";
316  let SubRegIndices = [sub0, sub1];
317  let HWEncoding = TMA_LO.HWEncoding;
318}
319
320foreach Index = 0...15 in {
321  defm TTMP#Index#_vi       : SIRegLoHi16<"ttmp"#Index, !add(112, Index)>;
322  defm TTMP#Index#_gfx9plus : SIRegLoHi16<"ttmp"#Index, !add(108, Index)>;
323  defm TTMP#Index           : SIRegLoHi16<"ttmp"#Index, 0>;
324}
325
326multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
327  defm _ci : SIRegLoHi16<n, ci_e>;
328  defm _vi : SIRegLoHi16<n, vi_e>;
329  defm "" : SIRegLoHi16<n, 0>;
330}
331
332class FlatReg <Register lo, Register hi, bits<16> encoding> :
333    RegisterWithSubRegs<"flat_scratch", [lo, hi]> {
334  let Namespace = "AMDGPU";
335  let SubRegIndices = [sub0, sub1];
336  let HWEncoding = encoding;
337}
338
339defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes.
340defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes.
341
342def FLAT_SCR_ci : FlatReg<FLAT_SCR_LO_ci, FLAT_SCR_HI_ci, 104>;
343def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>;
344def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
345
346// SGPR registers
347foreach Index = 0...105 in {
348  defm SGPR#Index :
349     SIRegLoHi16 <"s"#Index, Index>,
350     DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
351                  !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
352}
353
354// VGPR registers
355foreach Index = 0...255 in {
356  defm VGPR#Index :
357    SIRegLoHi16 <"v"#Index, Index, 0, 1>,
358    DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>;
359}
360
361// AccVGPR registers
362foreach Index = 0...255 in {
363  defm AGPR#Index :
364      SIRegLoHi16 <"a"#Index, Index, 1, 1>,
365      DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]>;
366}
367
368//===----------------------------------------------------------------------===//
369//  Groupings using register classes and tuples
370//===----------------------------------------------------------------------===//
371
372def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
373  let CopyCost = -1;
374  let isAllocatable = 0;
375  let HasSGPR = 1;
376  let BaseClassOrder = 10000;
377}
378
379def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> {
380  let CopyCost = 1;
381  let isAllocatable = 0;
382  let HasSGPR = 1;
383}
384
385def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
386  let CopyCost = 1;
387  let Size = 16;
388  let isAllocatable = 0;
389  let HasSGPR = 1;
390}
391
392// TODO: Do we need to set DwarfRegAlias on register tuples?
393
394def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
395                              (add (sequence "SGPR%u_LO16", 0, 105))> {
396  let AllocationPriority = 0;
397  let Size = 16;
398  let GeneratePressureSet = 0;
399  let HasSGPR = 1;
400}
401
402def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
403                              (add (sequence "SGPR%u_HI16", 0, 105))> {
404  let isAllocatable = 0;
405  let Size = 16;
406  let GeneratePressureSet = 0;
407  let HasSGPR = 1;
408}
409
410// SGPR 32-bit registers
411def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
412                            (add (sequence "SGPR%u", 0, 105))> {
413  // Give all SGPR classes higher priority than VGPR classes, because
414  // we want to spill SGPRs to VGPRs.
415  let AllocationPriority = 0;
416  let GeneratePressureSet = 0;
417  let HasSGPR = 1;
418}
419
420// SGPR 64-bit registers
421def SGPR_64Regs : SIRegisterTuples<getSubRegs<2>.ret, SGPR_32, 105, 2, 2, "s">;
422
423// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
424def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 3, 3, "s">;
425
426// SGPR 128-bit registers
427def SGPR_128Regs : SIRegisterTuples<getSubRegs<4>.ret, SGPR_32, 105, 4, 4, "s">;
428
429// SGPR 160-bit registers. No operations use these, but for symmetry with 160-bit VGPRs.
430def SGPR_160Regs : SIRegisterTuples<getSubRegs<5>.ret, SGPR_32, 105, 4, 5, "s">;
431
432// SGPR 192-bit registers. No operations use these, but for symmetry with 192-bit VGPRs.
433def SGPR_192Regs : SIRegisterTuples<getSubRegs<6>.ret, SGPR_32, 105, 4, 6, "s">;
434
435// SGPR 224-bit registers. No operations use these, but for symmetry with 224-bit VGPRs.
436def SGPR_224Regs : SIRegisterTuples<getSubRegs<7>.ret, SGPR_32, 105, 4, 7, "s">;
437
438// SGPR 256-bit registers
439def SGPR_256Regs : SIRegisterTuples<getSubRegs<8>.ret, SGPR_32, 105, 4, 8, "s">;
440
441// SGPR 288-bit registers. No operations use these, but for symmetry with 288-bit VGPRs.
442def SGPR_288Regs : SIRegisterTuples<getSubRegs<9>.ret, SGPR_32, 105, 4, 9, "s">;
443
444// SGPR 320-bit registers. No operations use these, but for symmetry with 320-bit VGPRs.
445def SGPR_320Regs : SIRegisterTuples<getSubRegs<10>.ret, SGPR_32, 105, 4, 10, "s">;
446
447// SGPR 352-bit registers. No operations use these, but for symmetry with 352-bit VGPRs.
448def SGPR_352Regs : SIRegisterTuples<getSubRegs<11>.ret, SGPR_32, 105, 4, 11, "s">;
449
450// SGPR 384-bit registers. No operations use these, but for symmetry with 384-bit VGPRs.
451def SGPR_384Regs : SIRegisterTuples<getSubRegs<12>.ret, SGPR_32, 105, 4, 12, "s">;
452
453// SGPR 512-bit registers
454def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s">;
455
456// SGPR 1024-bit registers
457def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">;
458
459// Trap handler TMP 32-bit registers
460def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
461                            (add (sequence "TTMP%u", 0, 15))> {
462  let isAllocatable = 0;
463  let HasSGPR = 1;
464}
465
466// Trap handler TMP 16-bit registers
467def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
468                              (add (sequence "TTMP%u_LO16", 0, 15))> {
469  let Size = 16;
470  let isAllocatable = 0;
471  let HasSGPR = 1;
472}
473
474// Trap handler TMP 64-bit registers
475def TTMP_64Regs : SIRegisterTuples<getSubRegs<2>.ret, TTMP_32, 15, 2, 2, "ttmp">;
476
477// Trap handler TMP 96-bit registers
478def TTMP_96Regs : SIRegisterTuples<getSubRegs<3>.ret, TTMP_32, 15, 3, 3, "ttmp">;
479
480// Trap handler TMP 128-bit registers
481def TTMP_128Regs : SIRegisterTuples<getSubRegs<4>.ret, TTMP_32, 15, 4, 4, "ttmp">;
482
483// Trap handler TMP 160-bit registers
484def TTMP_160Regs : SIRegisterTuples<getSubRegs<5>.ret, TTMP_32, 15, 4, 5, "ttmp">;
485
486// Trap handler TMP 192-bit registers
487def TTMP_192Regs : SIRegisterTuples<getSubRegs<6>.ret, TTMP_32, 15, 4, 6, "ttmp">;
488
489// Trap handler TMP 224-bit registers
490def TTMP_224Regs : SIRegisterTuples<getSubRegs<7>.ret, TTMP_32, 15, 4, 7, "ttmp">;
491
492// Trap handler TMP 256-bit registers
493def TTMP_256Regs : SIRegisterTuples<getSubRegs<8>.ret, TTMP_32, 15, 4, 8, "ttmp">;
494
495// Trap handler TMP 288-bit registers
496def TTMP_288Regs : SIRegisterTuples<getSubRegs<9>.ret, TTMP_32, 15, 4, 9, "ttmp">;
497
498// Trap handler TMP 320-bit registers
499def TTMP_320Regs : SIRegisterTuples<getSubRegs<10>.ret, TTMP_32, 15, 4, 10, "ttmp">;
500
501// Trap handler TMP 352-bit registers
502def TTMP_352Regs : SIRegisterTuples<getSubRegs<11>.ret, TTMP_32, 15, 4, 11, "ttmp">;
503
504// Trap handler TMP 384-bit registers
505def TTMP_384Regs : SIRegisterTuples<getSubRegs<12>.ret, TTMP_32, 15, 4, 12, "ttmp">;
506
507// Trap handler TMP 512-bit registers
508def TTMP_512Regs : SIRegisterTuples<getSubRegs<16>.ret, TTMP_32, 15, 4, 16, "ttmp">;
509
510class TmpRegTuplesBase<int index, int size,
511                       list<Register> subRegs,
512                       list<SubRegIndex> indices = getSubRegs<size>.ret,
513                       int index1 = !add(index, size, -1),
514                       string name = "ttmp["#index#":"#index1#"]"> :
515  RegisterWithSubRegs<name, subRegs> {
516  let HWEncoding = subRegs[0].HWEncoding;
517  let SubRegIndices = indices;
518}
519
520class TmpRegTuples<string tgt,
521                   int size,
522                   int index0,
523                   int index1 = !add(index0, 1),
524                   int index2 = !add(index0, !if(!eq(size, 2), 1, 2)),
525                   int index3 = !add(index0, !if(!eq(size, 2), 1, 3)),
526                   int index4 = !add(index0, !if(!eq(size, 8), 4, 1)),
527                   int index5 = !add(index0, !if(!eq(size, 8), 5, 1)),
528                   int index6 = !add(index0, !if(!eq(size, 8), 6, 1)),
529                   int index7 = !add(index0, !if(!eq(size, 8), 7, 1)),
530                   Register r0 = !cast<Register>("TTMP"#index0#tgt),
531                   Register r1 = !cast<Register>("TTMP"#index1#tgt),
532                   Register r2 = !cast<Register>("TTMP"#index2#tgt),
533                   Register r3 = !cast<Register>("TTMP"#index3#tgt),
534                   Register r4 = !cast<Register>("TTMP"#index4#tgt),
535                   Register r5 = !cast<Register>("TTMP"#index5#tgt),
536                   Register r6 = !cast<Register>("TTMP"#index6#tgt),
537                   Register r7 = !cast<Register>("TTMP"#index7#tgt)> :
538  TmpRegTuplesBase<index0, size,
539                   !if(!eq(size, 2), [r0, r1],
540                       !if(!eq(size, 4), [r0, r1, r2, r3],
541                                         [r0, r1, r2, r3, r4, r5, r6, r7])),
542                   getSubRegs<size>.ret>;
543
544foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
545  def TTMP#Index#_TTMP#!add(Index,1)#_vi       : TmpRegTuples<"_vi",   2, Index>;
546  def TTMP#Index#_TTMP#!add(Index,1)#_gfx9plus : TmpRegTuples<"_gfx9plus", 2, Index>;
547}
548
549foreach Index = {0, 4, 8, 12} in {
550  def TTMP#Index#_TTMP#!add(Index,1)#
551                 _TTMP#!add(Index,2)#
552                 _TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi",   4, Index>;
553  def TTMP#Index#_TTMP#!add(Index,1)#
554                 _TTMP#!add(Index,2)#
555                 _TTMP#!add(Index,3)#_gfx9plus : TmpRegTuples<"_gfx9plus", 4, Index>;
556}
557
558foreach Index = {0, 4, 8} in {
559  def TTMP#Index#_TTMP#!add(Index,1)#
560                 _TTMP#!add(Index,2)#
561                 _TTMP#!add(Index,3)#
562                 _TTMP#!add(Index,4)#
563                 _TTMP#!add(Index,5)#
564                 _TTMP#!add(Index,6)#
565                 _TTMP#!add(Index,7)#_vi : TmpRegTuples<"_vi",   8, Index>;
566  def TTMP#Index#_TTMP#!add(Index,1)#
567                 _TTMP#!add(Index,2)#
568                 _TTMP#!add(Index,3)#
569                 _TTMP#!add(Index,4)#
570                 _TTMP#!add(Index,5)#
571                 _TTMP#!add(Index,6)#
572                 _TTMP#!add(Index,7)#_gfx9plus : TmpRegTuples<"_gfx9plus", 8, Index>;
573}
574
575def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
576  TmpRegTuplesBase<0, 16,
577                   [TTMP0_vi, TTMP1_vi, TTMP2_vi, TTMP3_vi,
578                    TTMP4_vi, TTMP5_vi, TTMP6_vi, TTMP7_vi,
579                    TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
580                    TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
581
582def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9plus :
583  TmpRegTuplesBase<0, 16,
584                   [TTMP0_gfx9plus, TTMP1_gfx9plus, TTMP2_gfx9plus, TTMP3_gfx9plus,
585                    TTMP4_gfx9plus, TTMP5_gfx9plus, TTMP6_gfx9plus, TTMP7_gfx9plus,
586                    TTMP8_gfx9plus, TTMP9_gfx9plus, TTMP10_gfx9plus, TTMP11_gfx9plus,
587                    TTMP12_gfx9plus, TTMP13_gfx9plus, TTMP14_gfx9plus, TTMP15_gfx9plus]>;
588
589class RegisterTypes<list<ValueType> reg_types> {
590  list<ValueType> types = reg_types;
591}
592
593def Reg16Types : RegisterTypes<[i16, f16]>;
594def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
595
596let HasVGPR = 1 in {
597def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
598                              (add (sequence "VGPR%u_LO16", 0, 255))> {
599  let AllocationPriority = 0;
600  let Size = 16;
601  let GeneratePressureSet = 0;
602  let BaseClassOrder = 16;
603}
604
605def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
606                              (add (sequence "VGPR%u_HI16", 0, 255))> {
607  let AllocationPriority = 0;
608  let Size = 16;
609  let GeneratePressureSet = 0;
610  let BaseClassOrder = 17;
611}
612
613// VGPR 32-bit registers
614// i16/f16 only on VI+
615def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
616                            (add (sequence "VGPR%u", 0, 255))> {
617  let AllocationPriority = 0;
618  let Size = 32;
619  let Weight = 1;
620  let BaseClassOrder = 32;
621}
622
623// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
624def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
625                            (add (sequence "VGPR%u", 0, 127))> {
626  let AllocationPriority = 0;
627  let GeneratePressureSet = 0;
628  let Size = 32;
629  let Weight = 1;
630}
631} // End HasVGPR = 1
632
633// VGPR 64-bit registers
634def VGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, VGPR_32, 255, 1, 2, "v">;
635
636// VGPR 96-bit registers
637def VGPR_96 : SIRegisterTuples<getSubRegs<3>.ret, VGPR_32, 255, 1, 3, "v">;
638
639// VGPR 128-bit registers
640def VGPR_128 : SIRegisterTuples<getSubRegs<4>.ret, VGPR_32, 255, 1, 4, "v">;
641
642// VGPR 160-bit registers
643def VGPR_160 : SIRegisterTuples<getSubRegs<5>.ret, VGPR_32, 255, 1, 5, "v">;
644
645// VGPR 192-bit registers
646def VGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, VGPR_32, 255, 1, 6, "v">;
647
648// VGPR 224-bit registers
649def VGPR_224 : SIRegisterTuples<getSubRegs<7>.ret, VGPR_32, 255, 1, 7, "v">;
650
651// VGPR 256-bit registers
652def VGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, VGPR_32, 255, 1, 8, "v">;
653
654// VGPR 288-bit registers
655def VGPR_288 : SIRegisterTuples<getSubRegs<9>.ret, VGPR_32, 255, 1, 9, "v">;
656
657// VGPR 320-bit registers
658def VGPR_320 : SIRegisterTuples<getSubRegs<10>.ret, VGPR_32, 255, 1, 10, "v">;
659
660// VGPR 352-bit registers
661def VGPR_352 : SIRegisterTuples<getSubRegs<11>.ret, VGPR_32, 255, 1, 11, "v">;
662
663// VGPR 384-bit registers
664def VGPR_384 : SIRegisterTuples<getSubRegs<12>.ret, VGPR_32, 255, 1, 12, "v">;
665
666// VGPR 512-bit registers
667def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;
668
669// VGPR 1024-bit registers
670def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;
671
672let HasAGPR = 1 in {
673def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
674                              (add (sequence "AGPR%u_LO16", 0, 255))> {
675  let isAllocatable = 0;
676  let Size = 16;
677  let GeneratePressureSet = 0;
678  let BaseClassOrder = 16;
679}
680
681// AccVGPR 32-bit registers
682def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
683                            (add (sequence "AGPR%u", 0, 255))> {
684  let AllocationPriority = 0;
685  let Size = 32;
686  let Weight = 1;
687  let BaseClassOrder = 32;
688}
689} // End HasAGPR = 1
690
691// AGPR 64-bit registers
692def AGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, AGPR_32, 255, 1, 2, "a">;
693
694// AGPR 96-bit registers
695def AGPR_96 : SIRegisterTuples<getSubRegs<3>.ret, AGPR_32, 255, 1, 3, "a">;
696
697// AGPR 128-bit registers
698def AGPR_128 : SIRegisterTuples<getSubRegs<4>.ret, AGPR_32, 255, 1, 4, "a">;
699
700// AGPR 160-bit registers
701def AGPR_160 : SIRegisterTuples<getSubRegs<5>.ret, AGPR_32, 255, 1, 5, "a">;
702
703// AGPR 192-bit registers
704def AGPR_192 : SIRegisterTuples<getSubRegs<6>.ret, AGPR_32, 255, 1, 6, "a">;
705
706// AGPR 224-bit registers
707def AGPR_224 : SIRegisterTuples<getSubRegs<7>.ret, AGPR_32, 255, 1, 7, "a">;
708
709// AGPR 256-bit registers
710def AGPR_256 : SIRegisterTuples<getSubRegs<8>.ret, AGPR_32, 255, 1, 8, "a">;
711
712// AGPR 288-bit registers
713def AGPR_288 : SIRegisterTuples<getSubRegs<9>.ret, AGPR_32, 255, 1, 9, "a">;
714
715// AGPR 320-bit registers
716def AGPR_320 : SIRegisterTuples<getSubRegs<10>.ret, AGPR_32, 255, 1, 10, "a">;
717
718// AGPR 352-bit registers
719def AGPR_352 : SIRegisterTuples<getSubRegs<11>.ret, AGPR_32, 255, 1, 11, "a">;
720
721// AGPR 384-bit registers
722def AGPR_384 : SIRegisterTuples<getSubRegs<12>.ret, AGPR_32, 255, 1, 12, "a">;
723
724// AGPR 512-bit registers
725def AGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, AGPR_32, 255, 1, 16, "a">;
726
727// AGPR 1024-bit registers
728def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;
729
730//===----------------------------------------------------------------------===//
731//  Register classes used as source and destination
732//===----------------------------------------------------------------------===//
733
734def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
735  (add FP_REG, SP_REG)> {
736  let isAllocatable = 0;
737  let CopyCost = -1;
738  let HasSGPR = 1;
739  let BaseClassOrder = 10000;
740}
741
742def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32,
743  (add PRIVATE_RSRC_REG)> {
744  let isAllocatable = 0;
745  let CopyCost = -1;
746  let HasSGPR = 1;
747  let BaseClassOrder = 10000;
748}
749
750def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
751  (add LDS_DIRECT)> {
752  let isAllocatable = 0;
753  let CopyCost = -1;
754}
755
756let GeneratePressureSet = 0, HasSGPR = 1 in {
757// Subset of SReg_32 without M0 for SMRD instructions and alike.
758// See comments in SIInstructions.td for more info.
759def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
760  (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
761   SGPR_NULL, SGPR_NULL_HI, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE_LO,
762   SRC_SHARED_LIMIT_LO, SRC_PRIVATE_BASE_LO, SRC_PRIVATE_LIMIT_LO, SRC_SHARED_BASE_HI,
763   SRC_SHARED_LIMIT_HI, SRC_PRIVATE_BASE_HI, SRC_PRIVATE_LIMIT_HI, SRC_POPS_EXITING_WAVE_ID,
764   SRC_VCCZ, SRC_EXECZ, SRC_SCC)> {
765  let AllocationPriority = 0;
766}
767
768def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
769  (add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
770   XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, SGPR_NULL_HI_LO16, TTMP_LO16,
771   TMA_LO_LO16, TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO_LO16,
772   SRC_SHARED_LIMIT_LO_LO16, SRC_PRIVATE_BASE_LO_LO16, SRC_PRIVATE_LIMIT_LO_LO16,
773   SRC_SHARED_BASE_HI_LO16, SRC_SHARED_LIMIT_HI_LO16, SRC_PRIVATE_BASE_HI_LO16,
774   SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16,
775   SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16)> {
776  let Size = 16;
777  let AllocationPriority = 0;
778  let BaseClassOrder = 16;
779}
780
781def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
782  (add SReg_32_XM0_XEXEC, M0_CLASS)> {
783  let AllocationPriority = 0;
784}
785
786def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
787  (add SReg_32_XEXEC, EXEC_LO)> {
788  let AllocationPriority = 0;
789}
790
791def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
792  (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
793  let AllocationPriority = 0;
794}
795
796} // End GeneratePressureSet = 0
797
798// Register class for all scalar registers (SGPRs + Special Registers)
799def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
800  (add SReg_32_XM0, M0_CLASS)> {
801  let AllocationPriority = 0;
802  let HasSGPR = 1;
803  let BaseClassOrder = 32;
804}
805
806let GeneratePressureSet = 0 in {
807def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
808  (add SReg_32, LDS_DIRECT_CLASS)> {
809  let isAllocatable = 0;
810  let HasSGPR = 1;
811}
812
813def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
814                            (add SGPR_64Regs)> {
815  let CopyCost = 1;
816  let AllocationPriority = 1;
817  let HasSGPR = 1;
818}
819
820def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
821                            (add TTMP_64Regs)> {
822  let isAllocatable = 0;
823  let HasSGPR = 1;
824}
825
826def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
827  (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, SGPR_NULL64, SRC_SHARED_BASE,
828       SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, TTMP_64, TBA, TMA)> {
829  let CopyCost = 1;
830  let AllocationPriority = 1;
831  let HasSGPR = 1;
832}
833
834def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
835  (add SReg_64_XEXEC, EXEC)> {
836  let CopyCost = 1;
837  let AllocationPriority = 1;
838  let HasSGPR = 1;
839  let BaseClassOrder = 64;
840}
841
842def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
843  (add SReg_64_XEXEC, SReg_32_XEXEC)> {
844  let CopyCost = 1;
845  let isAllocatable = 0;
846  let HasSGPR = 1;
847}
848
849def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32,
850  (add SReg_1_XEXEC, EXEC, EXEC_LO, EXEC_HI)> {
851  let CopyCost = 1;
852  let isAllocatable = 0;
853  let HasSGPR = 1;
854}
855
856multiclass SRegClass<int numRegs,
857                     list<ValueType> regTypes,
858                     SIRegisterTuples regList,
859                     SIRegisterTuples ttmpList = regList,
860                     int copyCost = !sra(!add(numRegs, 1), 1)> {
861  defvar hasTTMP = !ne(regList, ttmpList);
862  defvar suffix = !cast<string>(!mul(numRegs, 32));
863  defvar sgprName = !strconcat("SGPR_", suffix);
864  defvar ttmpName = !strconcat("TTMP_", suffix);
865
866  let AllocationPriority = !sub(numRegs, 1), CopyCost = copyCost, HasSGPR = 1 in {
867    def "" # sgprName : SIRegisterClass<"AMDGPU", regTypes, 32, (add regList)> {
868    }
869
870    if hasTTMP then {
871      def "" # ttmpName : SIRegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> {
872        let isAllocatable = 0;
873      }
874    }
875
876    def SReg_ # suffix :
877      SIRegisterClass<"AMDGPU", regTypes, 32,
878                    !con(!dag(add, [!cast<RegisterClass>(sgprName)], ["sgpr"]),
879                    !if(hasTTMP,
880                        !dag(add, [!cast<RegisterClass>(ttmpName)], ["ttmp"]),
881                        (add)))> {
882      let isAllocatable = 0;
883      let BaseClassOrder = !mul(numRegs, 32);
884    }
885  }
886}
887
888defm "" : SRegClass<3, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;
889defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;
890defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
891defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
892defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
893defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], SGPR_256Regs, TTMP_256Regs>;
894defm "" : SRegClass<9, [v9i32, v9f32], SGPR_288Regs, TTMP_288Regs>;
895defm "" : SRegClass<10, [v10i32, v10f32], SGPR_320Regs, TTMP_320Regs>;
896defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
897defm "" : SRegClass<12, [v12i32, v12f32], SGPR_384Regs, TTMP_384Regs>;
898
899let GlobalPriority = true in {
900defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
901defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
902}
903
904def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
905                                 (add VGPR_32, LDS_DIRECT_CLASS)> {
906  let isAllocatable = 0;
907  let HasVGPR = 1;
908}
909
910// Register class for all vector registers (VGPRs + Interpolation Registers)
911class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
912    SIRegisterClass<"AMDGPU", regTypes, 32, regList> {
913  let Size = !mul(numRegs, 32);
914
915  // Requires n v_mov_b32 to copy
916  let CopyCost = numRegs;
917  let AllocationPriority = !sub(numRegs, 1);
918  let Weight = numRegs;
919}
920
921// Define a register tuple class, along with one requiring an even
922// aligned base register.
923multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
924  let HasVGPR = 1 in {
925    // Define the regular class.
926    def "" : VRegClassBase<numRegs, regTypes, regList> {
927      let BaseClassOrder = !mul(numRegs, 32);
928    }
929
930    // Define 2-aligned variant
931    def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)> {
932      // Give aligned class higher priority in base class resolution
933      let BaseClassOrder = !sub(!mul(numRegs, 32), 1);
934    }
935  }
936}
937
938defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
939                                (add VGPR_64)>;
940defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;
941defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>;
942defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
943
944defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;
945defm VReg_224 : VRegClass<7, [v7i32, v7f32], (add VGPR_224)>;
946defm VReg_256 : VRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16], (add VGPR_256)>;
947defm VReg_288 : VRegClass<9, [v9i32, v9f32], (add VGPR_288)>;
948defm VReg_320 : VRegClass<10, [v10i32, v10f32], (add VGPR_320)>;
949defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>;
950defm VReg_384 : VRegClass<12, [v12i32, v12f32], (add VGPR_384)>;
951
952let GlobalPriority = true in {
953defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
954defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
955}
956
957multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
958  let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
959    // Define the regular class.
960    def "" : VRegClassBase<numRegs, regTypes, regList> {
961      let BaseClassOrder = !mul(numRegs, 32);
962    }
963
964    // Define 2-aligned variant
965    def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)> {
966      // Give aligned class higher priority in base class resolution
967      let BaseClassOrder = !sub(!mul(numRegs, 32), 1);
968    }
969  }
970}
971
972defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
973                        (add AGPR_64)>;
974defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
975defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>;
976defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
977defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>;
978defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>;
979defm AReg_256 : ARegClass<8, [v8i32, v8f32, v4i64, v4f64], (add AGPR_256)>;
980defm AReg_288 : ARegClass<9, [v9i32, v9f32], (add AGPR_288)>;
981defm AReg_320 : ARegClass<10, [v10i32, v10f32], (add AGPR_320)>;
982defm AReg_352 : ARegClass<11, [v11i32, v11f32], (add AGPR_352)>;
983defm AReg_384 : ARegClass<12, [v12i32, v12f32], (add AGPR_384)>;
984
985let GlobalPriority = true in {
986defm AReg_512 : ARegClass<16, [v16i32, v16f32, v8i64, v8f64], (add AGPR_512)>;
987defm AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024)>;
988}
989
990} // End GeneratePressureSet = 0
991
992let GeneratePressureSet = 0 in {
993// No register should ever be allocated using VReg_1. This is a hack for
994// SelectionDAG that should always be lowered by SILowerI1Copies.  TableGen
995// sorts register classes based on the number of registers in them so this is
996// sorted to the end and not preferred over VGPR_32.
997def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add)> {
998  let Size = 1;
999  let HasVGPR = 1;
1000}
1001
1002def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
1003                          (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
1004  let isAllocatable = 0;
1005  let HasVGPR = 1;
1006  let HasSGPR = 1;
1007}
1008
1009def VS_32_Lo128 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
1010                          (add VGPR_32_Lo128, SReg_32, LDS_DIRECT_CLASS)> {
1011  let isAllocatable = 0;
1012  let HasVGPR = 1;
1013  let HasSGPR = 1;
1014}
1015
1016def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
1017  let isAllocatable = 0;
1018  let HasVGPR = 1;
1019  let HasSGPR = 1;
1020}
1021
1022def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
1023  let HasVGPR = 1;
1024  let HasAGPR = 1;
1025}
1026} // End GeneratePressureSet = 0
1027
1028// Define a register tuple class, along with one requiring an even
1029// aligned base register.
1030multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
1031                      dag vregList,  dag aregList> {
1032  let HasVGPR = 1, HasAGPR = 1 in {
1033    // Define the regular class.
1034    def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
1035
1036    // Define 2-aligned variant
1037    def _Align2 : VRegClassBase<numRegs, regTypes,
1038                                (add (decimate vregList, 2),
1039                                     (decimate aregList, 2))>;
1040  }
1041}
1042
1043defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>;
1044defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>;
1045defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>;
1046defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;
1047defm AV_192 : AVRegClass<6, VReg_192.RegTypes, (add VGPR_192), (add AGPR_192)>;
1048defm AV_224 : AVRegClass<7, VReg_224.RegTypes, (add VGPR_224), (add AGPR_224)>;
1049defm AV_256 : AVRegClass<8, VReg_256.RegTypes, (add VGPR_256), (add AGPR_256)>;
1050defm AV_288 : AVRegClass<9, VReg_288.RegTypes, (add VGPR_288), (add AGPR_288)>;
1051defm AV_320 : AVRegClass<10, VReg_320.RegTypes, (add VGPR_320), (add AGPR_320)>;
1052defm AV_352 : AVRegClass<11, VReg_352.RegTypes, (add VGPR_352), (add AGPR_352)>;
1053defm AV_384 : AVRegClass<12, VReg_384.RegTypes, (add VGPR_384), (add AGPR_384)>;
1054
1055let GlobalPriority = true in {
1056defm AV_512 : AVRegClass<16, VReg_512.RegTypes, (add VGPR_512), (add AGPR_512)>;
1057defm AV_1024 : AVRegClass<32, VReg_1024.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
1058}
1059
1060//===----------------------------------------------------------------------===//
1061//  Register operands
1062//===----------------------------------------------------------------------===//
1063
1064class RegImmMatcher<string name> : AsmOperandClass {
1065  let Name = name;
1066  let RenderMethod = "addRegOrImmOperands";
1067}
1068
1069// For VOP1,2,C True16 instructions. Uses first 128 32-bit VGPRs only
1070multiclass SIRegOperand16 <string rc, string MatchName, string opType,
1071                           string rc_suffix = "_32"> {
1072  let OperandNamespace = "AMDGPU" in {
1073    def _b16_Lo128 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix#"_Lo128")> {
1074      let OperandType = opType#"_INT16";
1075      let ParserMatchClass = RegImmMatcher<MatchName#"B16_Lo128">;
1076      let DecoderMethod = "decodeOperand_VSrc16";
1077    }
1078
1079    def _f16_Lo128 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix#"_Lo128")> {
1080      let OperandType = opType#"_FP16";
1081      let ParserMatchClass = RegImmMatcher<MatchName#"F16_Lo128">;
1082      let DecoderMethod = "decodeOperand_" # rc # "_16";
1083    }
1084  }
1085}
1086
1087
1088multiclass SIRegOperand32 <string rc, string MatchName, string opType,
1089                           string rc_suffix = "_32"> {
1090  let OperandNamespace = "AMDGPU" in {
1091    def _b16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1092      let OperandType = opType#"_INT16";
1093      let ParserMatchClass = RegImmMatcher<MatchName#"B16">;
1094      let DecoderMethod = "decodeOperand_VSrc16";
1095    }
1096
1097    def _f16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1098      let OperandType = opType#"_FP16";
1099      let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
1100      let DecoderMethod = "decodeOperand_" # rc # "_16";
1101    }
1102
1103    def _b32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1104      let OperandType = opType#"_INT32";
1105      let ParserMatchClass = RegImmMatcher<MatchName#"B32">;
1106      let DecoderMethod = "decodeOperand_" # rc # rc_suffix;
1107    }
1108
1109    def _f32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1110      let OperandType = opType#"_FP32";
1111      let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
1112      let DecoderMethod = "decodeOperand_" # rc # rc_suffix;
1113    }
1114
1115    def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1116      let OperandType = opType#"_V2INT16";
1117      let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">;
1118      let DecoderMethod = "decodeOperand_VSrcV216";
1119    }
1120
1121    def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1122      let OperandType = opType#"_V2FP16";
1123      let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">;
1124      let DecoderMethod = "decodeOperand_VSrcV216";
1125    }
1126  }
1127}
1128
1129multiclass SIRegOperand64 <string rc, string MatchName, string opType,
1130                           string rc_suffix = "_64", bit Vectors = 1> {
1131  let OperandNamespace = "AMDGPU" in {
1132    def _b64 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1133      let OperandType = opType#"_INT64";
1134      let ParserMatchClass = RegImmMatcher<MatchName#"B64">;
1135    }
1136
1137    def _f64 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1138      let OperandType = opType#"_FP64";
1139      let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
1140    }
1141
1142    if Vectors then
1143    def _v2f32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1144      let OperandType = opType#"_V2FP32";
1145      let ParserMatchClass = RegImmMatcher<MatchName#"V2FP32">;
1146      let DecoderMethod = "decodeOperand_VSrcV232";
1147    }
1148    if Vectors then
1149    def _v2b32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1150      let OperandType = opType#"_V2INT32";
1151      let ParserMatchClass = RegImmMatcher<MatchName#"V2INT32">;
1152      let DecoderMethod = "decodeOperand_VSrcV232";
1153    }
1154  }
1155}
1156
1157multiclass SIRegOperand <string rc, string MatchName, string opType> :
1158  SIRegOperand32<rc, MatchName, opType>,
1159  SIRegOperand64<rc, MatchName, opType>;
1160
1161// FIXME: 64-bit sources can sometimes use 32-bit constants.
1162multiclass RegImmOperand <string rc, string MatchName>
1163  : SIRegOperand<rc, MatchName, "OPERAND_REG_IMM">;
1164
1165multiclass RegInlineOperand <string rc, string MatchName>
1166  : SIRegOperand<rc, MatchName, "OPERAND_REG_INLINE_C">;
1167
1168multiclass RegInlineOperand32 <string rc, string MatchName,
1169                               string rc_suffix = "_32">
1170  : SIRegOperand32<rc, MatchName, "OPERAND_REG_INLINE_C", rc_suffix>;
1171
1172multiclass RegInlineOperand64 <string rc, string MatchName,
1173                               string rc_suffix = "_64">
1174  : SIRegOperand64<rc, MatchName, "OPERAND_REG_INLINE_C", rc_suffix>;
1175
1176multiclass RegInlineOperandAC <string rc, string MatchName,
1177                               string rc_suffix = "_32">
1178  : SIRegOperand32<rc, MatchName, "OPERAND_REG_INLINE_AC", rc_suffix>;
1179
1180multiclass RegInlineOperandAC64 <string rc, string MatchName,
1181                                 string rc_suffix = "_64">
1182  : SIRegOperand64<rc, MatchName, "OPERAND_REG_INLINE_AC", rc_suffix, 0>;
1183
1184//===----------------------------------------------------------------------===//
1185//  SSrc_* Operands with an SGPR or a 32-bit immediate
1186//===----------------------------------------------------------------------===//
1187
1188defm SSrc : RegImmOperand<"SReg", "SSrc">;
1189
1190def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> {
1191  let OperandNamespace = "AMDGPU";
1192  let OperandType = "OPERAND_REG_IMM_INT32";
1193  let ParserMatchClass = RegImmMatcher<"SSrcOrLdsB32">;
1194}
1195
1196//===----------------------------------------------------------------------===//
1197//  SCSrc_* Operands with an SGPR or a inline constant
1198//===----------------------------------------------------------------------===//
1199
1200defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
1201
1202//===----------------------------------------------------------------------===//
1203//  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
1204//===----------------------------------------------------------------------===//
1205
1206defm VSrc : RegImmOperand<"VS", "VSrc">;
1207defm VSrcT : SIRegOperand16<"VS", "VSrcT", "OPERAND_REG_IMM">;
1208
1209def VSrc_128 : RegisterOperand<VReg_128> {
1210  let DecoderMethod = "DecodeVS_128RegisterClass";
1211}
1212
1213//===----------------------------------------------------------------------===//
1214//  VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use
1215//  with FMAMK/FMAAK
1216//===----------------------------------------------------------------------===//
1217
1218multiclass SIRegOperand16_Deferred <string rc, string MatchName, string opType,
1219                           string rc_suffix = "_32"> {
1220  let OperandNamespace = "AMDGPU" in {
1221    def _f16_Lo128_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix#"_Lo128")> {
1222      let OperandType = opType#"_FP16_DEFERRED";
1223      let ParserMatchClass = RegImmMatcher<MatchName#"F16_Lo128">;
1224      let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
1225    }
1226  }
1227}
1228
1229multiclass SIRegOperand32_Deferred <string rc, string MatchName, string opType,
1230                           string rc_suffix = "_32"> {
1231  let OperandNamespace = "AMDGPU" in {
1232    def _f16_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1233      let OperandType = opType#"_FP16_DEFERRED";
1234      let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
1235      let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
1236    }
1237
1238    def _f32_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
1239      let OperandType = opType#"_FP32_DEFERRED";
1240      let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
1241      let DecoderMethod = "decodeOperand_" # rc # "_32_Deferred";
1242    }
1243  }
1244}
1245
1246defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">;
1247defm VSrcT : SIRegOperand16_Deferred<"VS", "VSrcT", "OPERAND_REG_IMM">;
1248
1249//===----------------------------------------------------------------------===//
1250//  VRegSrc_* Operands with a VGPR
1251//===----------------------------------------------------------------------===//
1252
1253// This is for operands with the enum(9), VSrc encoding restriction,
1254// but only allows VGPRs.
1255def VRegSrc_32 : RegisterOperand<VGPR_32> {
1256  //let ParserMatchClass = RegImmMatcher<"VRegSrc32">;
1257  let DecoderMethod = "DecodeVS_32RegisterClass";
1258}
1259
1260def VRegSrc_64 : RegisterOperand<VReg_64> {
1261  let DecoderMethod = "decodeOperand_VReg_64";
1262}
1263
1264def VRegSrc_128 : RegisterOperand<VReg_128> {
1265  let DecoderMethod = "decodeOperand_VReg_128";
1266}
1267
1268def VRegSrc_256 : RegisterOperand<VReg_256> {
1269  let DecoderMethod = "decodeOperand_VReg_256";
1270}
1271
1272//===----------------------------------------------------------------------===//
1273// VGPRSrc_*
1274//===----------------------------------------------------------------------===//
1275
1276// An 8-bit RegisterOperand wrapper for a VGPR
1277def VGPRSrc_32 : RegisterOperand<VGPR_32> {
1278  let DecoderMethod = "DecodeVGPR_32RegisterClass";
1279}
1280def VGPRSrc_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
1281  let DecoderMethod = "DecodeVGPR_32RegisterClass";
1282}
1283
1284//===----------------------------------------------------------------------===//
1285//  ASrc_* Operands with an AccVGPR
1286//===----------------------------------------------------------------------===//
1287
1288def ARegSrc_32 : RegisterOperand<AGPR_32> {
1289  let DecoderMethod = "DecodeAGPR_32RegisterClass";
1290  let EncoderMethod = "getAVOperandEncoding";
1291}
1292
1293//===----------------------------------------------------------------------===//
1294//  VCSrc_* Operands with an SGPR, VGPR or an inline constant
1295//===----------------------------------------------------------------------===//
1296
1297defm VCSrc : RegInlineOperand<"VS", "VCSrc">;
1298defm VCSrcT : SIRegOperand16<"VS", "VCSrcT", "OPERAND_REG_INLINE_C">;
1299
1300//===----------------------------------------------------------------------===//
1301//  VISrc_* Operands with a VGPR or an inline constant
1302//===----------------------------------------------------------------------===//
1303
1304defm VISrc : RegInlineOperand32<"VGPR", "VISrc">;
1305let DecoderMethod = "decodeOperand_VReg_64" in
1306defm VISrc_64   : RegInlineOperand64<"VReg", "VISrc_64",   "_64">;
1307defm VISrc_128  : RegInlineOperandAC<"VReg", "VISrc_128",  "_128">;
1308let DecoderMethod = "decodeOperand_VReg_256" in
1309defm VISrc_256  : RegInlineOperand64<"VReg", "VISrc_256",  "_256">;
1310defm VISrc_512  : RegInlineOperandAC<"VReg", "VISrc_512",  "_512">;
1311defm VISrc_1024 : RegInlineOperandAC<"VReg", "VISrc_1024", "_1024">;
1312
1313//===----------------------------------------------------------------------===//
1314//  AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR
1315//===----------------------------------------------------------------------===//
1316
1317def AVSrc_32 : RegisterOperand<AV_32> {
1318  let DecoderMethod = "DecodeAV_32RegisterClass";
1319  let EncoderMethod = "getAVOperandEncoding";
1320}
1321
1322def AVSrc_64 : RegisterOperand<AV_64> {
1323  let DecoderMethod = "DecodeAV_64RegisterClass";
1324  let EncoderMethod = "getAVOperandEncoding";
1325}
1326
1327def AVSrc_128 : RegisterOperand<AV_128> {
1328  let DecoderMethod = "DecodeAV_128RegisterClass";
1329  let EncoderMethod = "getAVOperandEncoding";
1330}
1331
1332def AVDst_128 : RegisterOperand<AV_128> {
1333  let DecoderMethod = "DecodeAVDst_128RegisterClass";
1334  let EncoderMethod = "getAVOperandEncoding";
1335}
1336
1337def AVDst_512 : RegisterOperand<AV_512> {
1338  let DecoderMethod = "DecodeAVDst_512RegisterClass";
1339  let EncoderMethod = "getAVOperandEncoding";
1340}
1341
1342def AVLdSt_32 : RegisterOperand<AV_32> {
1343  let DecoderMethod = "DecodeAVLdSt_32RegisterClass";
1344  let EncoderMethod = "getAVOperandEncoding";
1345}
1346
1347def AVLdSt_64 : RegisterOperand<AV_64> {
1348  let DecoderMethod = "DecodeAVLdSt_64RegisterClass";
1349  let EncoderMethod = "getAVOperandEncoding";
1350}
1351
1352def AVLdSt_96 : RegisterOperand<AV_96> {
1353  let DecoderMethod = "DecodeAVLdSt_96RegisterClass";
1354  let EncoderMethod = "getAVOperandEncoding";
1355}
1356
1357def AVLdSt_128 : RegisterOperand<AV_128> {
1358  let DecoderMethod = "DecodeAVLdSt_128RegisterClass";
1359  let EncoderMethod = "getAVOperandEncoding";
1360}
1361
1362def AVLdSt_160 : RegisterOperand<AV_160> {
1363  let DecoderMethod = "DecodeAVLdSt_160RegisterClass";
1364  let EncoderMethod = "getAVOperandEncoding";
1365}
1366
1367//===----------------------------------------------------------------------===//
1368//  ACSrc_* Operands with an AGPR or an inline constant
1369//===----------------------------------------------------------------------===//
1370
1371defm AISrc      : RegInlineOperandAC<"AGPR", "AISrc">;
1372defm AISrc_128  : RegInlineOperandAC<"AReg", "AISrc_128",  "_128">;
1373defm AISrc_512  : RegInlineOperandAC<"AReg", "AISrc_512",  "_512">;
1374defm AISrc_1024 : RegInlineOperandAC<"AReg", "AISrc_1024", "_1024">;
1375
1376let DecoderMethod = "decodeOperand_AReg_64" in
1377defm AISrc_64   : RegInlineOperandAC64<"AReg", "AISrc_64",   "_64">;
1378let DecoderMethod = "decodeOperand_AReg_256" in
1379defm AISrc_256  : RegInlineOperandAC64<"AReg", "AISrc_256",  "_256">;
1380